예제 #1
0
        public static bool Get_Tagged_Pins(ref CMS_CrawlerModels model, string search_str, int limit = 1, string bookmarks_str = null, int page = 1)
        {
            var url = "https://www.facebook.com/lifewithsunshine/ads/?ref=page_internal&dpr=1&ajaxpipe=1&ajaxpipe_token=AXh9WxHKM0f06Rq9&country=1&path=%2Flifewithsunshine%2Fads%2F&__user=100003324695675&__a=1&__dyn=5V4cjLx2ByK5A9UkKHqAyqomzFE9XG8GAdyeGDirWqF1G7UnGdwIhEnUF7yWCHxCEjCyEnyo88ObGubyRUC48G5WAxamjDK7GgPwzxuFS58-ER2KdyU8p94jUXVoS48nVV8Gicx2q5o4OmayrBy8GudAx6cw_xle9xmjx2Qm3GE-qp3FK4bUCaxKh1e5pVkdxCi78SaCCy89ooKHVohxyhu9K9BmFpEBq8IHGfio8l8imEggmKbKqify4cXJ2oS3m6ogUK8GE_WUWiUd9azEKiEDyp8ymaVeaDU8fiAx2miQhxdyopBAyEN4yprypVUV1bCxe9yEgy8LzU9FWDz8a8Z112HJ7VVHAyEsyUlzF8WEKU&__req=jsonp_3&__be=1&__pc=PHASED%3ADEFAULT&__rev=4066324&__spin_r=4066324&__spin_b=trunk&__spin_t=1530514908&__adt=3";

            //var url = "https://www.facebook.com/lifewithsunshine/photos/a.1797478757133845.1073741829.1757631344451920/2017087941839591/?type=3";
            getDataPinterest(url, model, "", ref bookmarks_str);

            return(false);
        }
예제 #2
0
 public CMS_ProductsModels()
 {
     ListTime     = new List <SelectListItem>();
     ListQuantity = new List <SelectListItem>();
     Crawler      = new CMS_CrawlerModels();
     FromDate     = new DateTime(1990, 01, 01);
     ToDate       = DateTime.Now;
     listKeywords = new List <string>();
     listGroups   = new List <string>();
 }
        public ActionResult LoadScroll(PinFilterDTO pinFilter)
        {
            try
            {
                if (!string.IsNullOrEmpty(pinFilter.Url))
                {
                    NameValueCollection QueryString = CommonHelper.GetQueryParameters(pinFilter.Url);
                    var _Key   = QueryString["BoardID"] ?? "";
                    var _Group = QueryString["GroupID"] ?? "";

                    if (!string.IsNullOrEmpty(_Key))
                    {
                        pinFilter.LstBoardID.Add(_Key);
                    }
                    if (!string.IsNullOrEmpty(_Group))
                    {
                        pinFilter.LstGroupBoardID.Add(_Group);

                        var _lstKeywords = getListBoardByGroud(_Group);
                        pinFilter.LstBoardID.AddRange(_lstKeywords);
                    }
                }

                if (pinFilter.LstBoardID != null && pinFilter.LstBoardID.Count > 0)
                {
                    if (string.IsNullOrEmpty(pinFilter.LstBoardID[0]))
                    {
                        pinFilter.LstBoardID = null;
                    }
                }

                if (pinFilter.LstBoardID == null || pinFilter.LstBoardID.Count == 0)
                {
                    pinFilter.LstBoardID = getListBoard().Select(o => o.Value).ToList();
                }

                var modelCrawler = new CMS_CrawlerModels();
                if (pinFilter.LstBoardID != null && pinFilter.LstBoardID.Count > 0)
                {
                    var _pinModels = new List <PinsModels>();
                    var msg        = "";
                    pinFilter.PageSize = Commons.PageSize;
                    int totalPin = 0;
                    var result   = _fac.GetPin(ref _pinModels, ref totalPin, pinFilter, ref msg);
                    if (result)
                    {
                        modelCrawler.Pins = _pinModels;
                    }
                }

                return(PartialView("_ListItem", modelCrawler));
            }
            catch (Exception) { }
            return(new HttpStatusCodeResult(HttpStatusCode.BadRequest));
        }
예제 #4
0
        public static void CrawlerAllFb(string url, ref CMS_CrawlerModels pins)
        {
            try
            {
                var page_Id = "";
                var user_Id = "";
                url = url + "ads/?country=1&ref=page_internal";
                if (!string.IsNullOrEmpty(Cookies))
                {
                    var start = Cookies.IndexOf("c_user="******"=", start) + 1;
                    for (int i = start; i < Cookies.Length; i++)
                    {
                        char key = Cookies[i];
                        if (key == ';')
                        {
                            end = i;
                            break;
                        }
                    }
                    user_Id = Cookies.Substring(start, (end - start));
                }

                if (!string.IsNullOrEmpty(url))
                {
                    var end = url.IndexOf("/ads/");
                    end = url.IndexOf("/", end);
                    var start = 0;
                    for (int i = end; i > 0; i--)
                    {
                        char key = url[i];
                        if (key == '-')
                        {
                            start = i;
                            break;
                        }
                    }
                    page_Id = url.Substring(start + 1, (end - start - 1));
                }

                /* crawl first page */
                CrawlerFb(url, ref pins);

                /* check next page */
                if (!string.IsNullOrEmpty(page_Id) && !string.IsNullOrEmpty(user_Id))
                {
                    CrawlerNextPage(page_Id, user_Id, 8, url, ref pins);
                }
            }
            catch (Exception ex) { }
        }
        public ActionResult ProductDetail(string id, string Key)
        {
            var modelCrawler = new CMS_CrawlerModels();

            try
            {
                var model = new PinsModels();
                CrawlerHelper.Get_Tagged_PinsDetail(ref model, id);
                CrawlerHelper.Get_Tagged_OrtherPins(ref modelCrawler, Key, Commons.PinOrtherDefault, "", 1, id);
                modelCrawler.Pin = model;
            }
            catch (Exception) { }
            return(View(modelCrawler));
        }
예제 #6
0
        public static void CrawlerAllFb(string url, string cookie, ref CMS_CrawlerModels pins)
        {
            try
            {
                /* pre-processing */
                var user_Id = GetUserIDFromCookies(cookie);
                url = CheckUrl(url);

                /* crawl first page */
                string _pageId = "";
                NSLog.Logger.Info("Start Craw :" + url);
                NSLog.Logger.Info("Cookie : " + cookie);
                int countExp = 0;
                CrawlerFb(url, cookie, ref pins, ref countExp, ref _pageId);
                /* crawl detail */
                if (pins != null && pins.Pins != null && pins.Pins.Any())
                {
                    var totalPin = pins.Pins.Count;
                    NSLog.Logger.Info("Total Pin master :" + totalPin);
                    Parallel.ForEach(pins.Pins, (item) =>
                    {
                        countExp = 0;
                        if (!item.IsDynamic)
                        {
                            Thread.Sleep(5000);
                            CrawlerFBDetail(item.LinkApi, item.FbIds, cookie, ref item);
                        }
                    });
                }
                /* check next page ID */
                _pageId = string.IsNullOrEmpty(_pageId) ? GetNextPageID(url) : _pageId;

                /* crawl next page */
                if (!string.IsNullOrEmpty(_pageId) && !string.IsNullOrEmpty(user_Id))
                {
                    countExp = 0;
                    CrawlerNextPage(_pageId, user_Id, 8, url, cookie, ref countExp, ref pins);
                    var totalPin = pins.Pins.Count;
                    NSLog.Logger.Info("Total Pin master :" + totalPin);
                }


                NSLog.Logger.Info("End Craw :" + url + " :" + pins.Pins.Count);
            }
            catch (Exception ex)
            {
            }
        }
예제 #7
0
        public bool CrawlData(string Id, string createdBy, ref string msg)
        {
            NSLog.Logger.Info("CrawlData: " + Id);
            var model    = new CMS_CrawlerModels();
            var sequence = 0;
            var key      = "";

            var result = true;

            try
            {
                using (var _db = new CMS_Context())
                {
                    /* get key by ID */
                    var keyWord = _db.CMS_KeyWord.Where(o => o.ID == Id).FirstOrDefault();
                    if (keyWord != null)
                    {
                        sequence = keyWord.Sequence;
                        key      = keyWord.KeyWord;
                        /* check time span crawl */
                        var timeSpanCrawl = DateTime.Now - keyWord.UpdatedDate;
                        if (timeSpanCrawl.Value.TotalMinutes > 5 || keyWord.UpdatedDate == keyWord.CreatedDate) /* 5min to crawl data again */
                        {
                            /* update crawer date */
                            var bkTime = keyWord.UpdatedDate;
                            keyWord.UpdatedDate = DateTime.Now;
                            keyWord.UpdatedBy   = createdBy;
                            _db.SaveChanges();

                            /* call drawler api to crawl data */
                            CMSPinFactory _fac = new CMSPinFactory();

                            var listAcc    = _db.CMS_Account.Where(o => o.Status == (byte)Commons.EStatus.Active && o.IsActive).ToList();
                            var listCookie = listAcc.Select(x => x.Cookies).ToList();
                            var _cookie    = CommonHelper.RamdomCookie(listCookie);
                            CrawlerFbHelpers_v2.CrawlerAllFb(keyWord.KeyWord, _cookie, ref model);

                            var res = false;
                            if (model.Pins.Count > 0)
                            {
                                res = _fac.CreateOrUpdate(model.Pins, keyWord.ID, createdBy, ref msg);
                            }

                            if (res == false)
                            {
                                /* back to last crawl data */
                                //keyWord.UpdatedDate = bkTime;
                                //_db.SaveChanges();
                                result = false;
                            }
                            else
                            {
                                keyWord.UpdatedDate = DateTime.Now;
                                _db.SaveChanges();
                            }
                        }
                    }
                }

                LogHelper.WriteLogs(sequence.ToString() + " " + key, "Num post: " + model.Pins.Count().ToString());
                NSLog.Logger.Info("ResponseCrawlData", result.ToString());
            }
            catch (Exception ex)
            {
                msg    = "Crawl data is unsuccessfully.";
                result = false;

                LogHelper.WriteLogs("ErrorCrawlData: " + Id, JsonConvert.SerializeObject(ex));
                NSLog.Logger.Error("ErrorCrawlData: " + Id, ex);
            }

            return(result);
        }
예제 #8
0
        public static void CrawlerFb(string url, ref CMS_CrawlerModels pins)
        {
            try
            {
                int    _port          = 0;
                string _proxy         = CommonHelper.RamdomProxy(ref _port);
                Uri    uri            = new Uri(url);
                var    httpWebRequest = (HttpWebRequest)WebRequest.Create(uri);
                //httpWebRequest.Proxy = new WebProxy(_proxy,_port);
                /* request need cookie & user agent */
                httpWebRequest.Headers["Cookie"] = Cookies;
                httpWebRequest.UserAgent         = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0";
                httpWebRequest.Accept            = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";

                httpWebRequest.Timeout = 100000;
                var httpResponse = (HttpWebResponse)httpWebRequest.GetResponse();
                using (var streamReader = new StreamReader(httpResponse.GetResponseStream()))
                {
                    var html = streamReader.ReadToEnd();
                    HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                    doc.LoadHtml(html);
                    List <HtmlNode> nodeHtml = doc.DocumentNode.Descendants().Where
                                                   (x => (x.Name == "div" && x.Attributes["class"] != null &&
                                                          x.Attributes["class"].Value.Contains("_5pbx userContent _3576"))).ToList();

                    var ListDescription = new List <string>();
                    if (nodeHtml != null && nodeHtml.Count > 0)
                    {
                        foreach (var item in nodeHtml)
                        {
                            var NodeDescription = item.Descendants("p").ToList();
                            if (NodeDescription != null)
                            {
                                var description = NodeDescription[0].InnerText;
                                if (!string.IsNullOrEmpty(description))
                                {
                                    description = description.Replace("&quot;", "");
                                }
                                ListDescription.Add(description);
                            }
                            else
                            {
                                ListDescription.Add("");
                            }
                        }
                    }

                    //Name
                    List <HtmlNode> nodehtmlName = doc.DocumentNode.Descendants().Where
                                                       (x => (x.Name == "div" && x.Attributes["class"] != null &&
                                                              x.Attributes["class"].Value.Contains("_6a _5u5j _6b"))).ToList();

                    var ListName = new List <string>();
                    if (nodehtmlName != null && nodehtmlName.Count > 0)
                    {
                        foreach (var item in nodehtmlName)
                        {
                            var NodeName = item.Descendants("a").ToList();
                            if (NodeName != null)
                            {
                                var name = NodeName[0].InnerText;
                                if (!string.IsNullOrEmpty(name))
                                {
                                    name = name.Replace("&quot;", "");
                                }
                                ListName.Add(name);
                            }
                            else
                            {
                                ListName.Add("");
                            }
                        }
                    }
                    else
                    {
                        /* */
                        pins.ErrorStatus = (byte)Commons.EErrorStatus.AccBlocked;
                    }

                    // fb_id
                    var nodeFb_Id = doc.DocumentNode.Descendants().Where
                                    (
                        x => (x.Name == "div" && x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("_5pcp _5lel _2jyu _232_"))
                                    ).ToList();
                    List <string> fb_ids = new List <string>();
                    if (nodeFb_Id != null && nodeFb_Id.Count > 0)
                    {
                        foreach (var item in nodeFb_Id)
                        {
                            var strfb_id = item.GetAttributeValue("id", "");
                            if (!string.IsNullOrEmpty(strfb_id))
                            {
                                //var split = strfb_id.Split(';').ToList();
                                //if (split != null && split.Count > 1)
                                //{
                                //    var fb_id = split[1];
                                //    fb_ids.Add(fb_id);
                                //}
                                //else
                                //{
                                //    fb_ids.Add("");
                                //}
                                var fb_id = findFbId_v2(strfb_id);
                                if (!string.IsNullOrEmpty(fb_id))
                                {
                                    fb_ids.Add(fb_id);
                                }
                                else
                                {
                                    fb_ids.Add("");
                                }
                            }
                        }
                    }

                    LogHelper.WriteLogs("fb_ids: " + url, JsonConvert.SerializeObject(fb_ids));

                    // node html image
                    List <HtmlNode> nodeHtmlImage = doc.DocumentNode.Descendants().Where
                                                        (x => (x.Name == "div" && x.Attributes["class"] != null &&
                                                               x.Attributes["class"].Value.Contains("mtm"))).ToList();

                    if (nodeHtmlImage != null && nodeHtmlImage.Count > 0)
                    {
                        var index = 0;
                        foreach (var item in nodeHtmlImage)
                        {
                            List <string> fb_id = new List <string>();
                            // post normal
                            var nodeChildImage = item.Descendants("a").ToList();
                            if (nodeChildImage != null && nodeChildImage.Count > 0)
                            {
                                foreach (var itemImage in nodeChildImage)
                                {
                                    var _image     = itemImage.GetAttributeValue("data-ploi", "");
                                    var _apiDetail = itemImage.GetAttributeValue("href", "");
                                    if (!string.IsNullOrEmpty(_image))
                                    {
                                        _image = _image.Replace("amp;", "");
                                    }

                                    var Pin = new PinsModels();
                                    if (!string.IsNullOrEmpty(_image) && !string.IsNullOrEmpty(_apiDetail))
                                    {
                                        var Splits = _apiDetail.Split('/').ToList();
                                        if (Splits != null && Splits.Count >= 5)
                                        {
                                            fb_id.Add(Splits[4]);
                                        }

                                        if (fb_ids != null && fb_ids.Count >= index /*&& nodeChildImage.Count == 1*/)
                                        {
                                            if (!string.IsNullOrEmpty(fb_ids[index]))
                                            {
                                                fb_id.Add(fb_ids[index]);
                                            }
                                        }
                                        CrawlerFBDetail(_apiDetail, fb_id, ref Pin);
                                        Pin.ImageURL = _image;
                                        if (ListDescription != null && ListDescription.Count >= index)
                                        {
                                            Pin.Description = ListDescription[index];
                                        }

                                        if (ListName != null && ListName.Count >= index)
                                        {
                                            Pin.OwnerName = ListName[index];
                                        }
                                        pins.Pins.Add(Pin);
                                    }
                                }
                            }
                            //post dynamic
                            var nodeChildDynamic = item.Descendants("ul").ToList();
                            if (nodeChildDynamic != null && nodeChildDynamic.Count > 0)
                            {
                                var _doc = new HtmlDocument();
                                _doc.LoadHtml(nodeChildDynamic[0].InnerHtml);
                                var nodeLI = _doc.DocumentNode.Descendants().Where(
                                    x => (x.Name == "li" && x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("_5ya"))).ToList();
                                if (nodeLI != null && nodeLI.Count > 0)
                                {
                                    foreach (var itemLI in nodeLI)
                                    {
                                        var Pin         = new PinsModels();
                                        var nodeLIImage = itemLI.Descendants("img").ToList();
                                        if (nodeLIImage != null && nodeLIImage.Count > 0)
                                        {
                                            var _image = nodeLIImage[0].GetAttributeValue("src", "");
                                            if (!string.IsNullOrEmpty(_image))
                                            {
                                                _image       = _image.Replace("amp;", "");
                                                Pin.ImageURL = _image;
                                                var PinId = findFbOh(_image);
                                                Pin.ID = PinId + "_" + fb_ids[index];
                                            }
                                        }

                                        var nodeLink = itemLI.Descendants("a").ToList();
                                        if (nodeLink != null && nodeLink.Count > 0)
                                        {
                                            var _link = nodeLink[0].GetAttributeValue("href", "");
                                            Pin.Link = _link;
                                        }

                                        //description
                                        var nodeLIDescription = itemLI.Descendants("div").ToList();
                                        if (nodeLIDescription != null && nodeLIDescription.Count > 0)
                                        {
                                            var _description = nodeLIDescription.Where(x => x.LastChild.Name.Equals("#text")).FirstOrDefault();
                                            if (_description != null)
                                            {
                                                Pin.Description = _description.InnerText;
                                            }
                                        }
                                        if (ListName != null && ListName.Count >= index)
                                        {
                                            Pin.OwnerName = ListName[index];
                                        }

                                        if (!string.IsNullOrEmpty(Pin.ID))
                                        {
                                            pins.Pins.Add(Pin);
                                        }
                                    }
                                }
                            }
                            index++;
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                LogHelper.WriteLogs("ErrorCrawlerFB: " + url, JsonConvert.SerializeObject(ex));
                NSLog.Logger.Error("Crawler Fb: ", ex);
            }
        }
예제 #9
0
        public static void CrawlerNextPage(string pageId, string userId, int cursor, string referer, ref CMS_CrawlerModels pins)
        {
            int    _port          = 0;
            string _proxy         = CommonHelper.RamdomProxy(ref _port);
            var    url            = "https://www.facebook.com/pages/ads/more/?cursor=" + cursor + "&surface=www_page_ads&unit_count=" + cursor + "&country=1&dpr=1&__user="******"&__a=1&__req=v&__be=1&__pc=PHASED%3ADEFAULT&__rev=4075583&__spin_r=4075583&__spin_b=trunk&__spin_t=1530846023&page_id=" + pageId + "";
            Uri    uri            = new Uri(url);
            var    httpWebRequest = (HttpWebRequest)WebRequest.Create(uri);

            //httpWebRequest.Proxy = new WebProxy(_proxy, _port);
            /* request need cookie & user agent */
            httpWebRequest.Headers["Cookie"] = Cookies;
            httpWebRequest.Referer           = referer;
            httpWebRequest.UserAgent         = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0";
            httpWebRequest.Accept            = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";

            httpWebRequest.Timeout = 100000;
            var httpResponse = (HttpWebResponse)httpWebRequest.GetResponse();

            using (var streamReader = new StreamReader(httpResponse.GetResponseStream()))
            {
                var html = streamReader.ReadToEnd();
                if (!string.IsNullOrEmpty(html))
                {
                    html = html.Replace("for (;;);", "");
                    JavaScriptSerializer jsonSerializer = new JavaScriptSerializer();
                    dynamic dobj   = jsonSerializer.Deserialize <dynamic>(html);
                    var     domops = dobj["domops"];
                    if (domops != null)
                    {
                        var _objhtmt = domops[0][3];
                        if (_objhtmt != null)
                        {
                            var _html = _objhtmt["__html"];
                            if (!string.IsNullOrEmpty(_html))
                            {
                                var htmlDoc = new HtmlDocument();
                                htmlDoc.LoadHtml(_html);

                                List <HtmlNode> nodeHtml = htmlDoc.DocumentNode.Descendants().Where
                                                               (x => (x.Name == "div" && x.Attributes["class"] != null &&
                                                                      x.Attributes["class"].Value.Contains("_5pbx userContent _3576"))).ToList();

                                var ListDescription = new List <string>();
                                if (nodeHtml != null && nodeHtml.Count > 0)
                                {
                                    foreach (var item in nodeHtml)
                                    {
                                        var NodeDescription = item.Descendants("p").ToList();
                                        if (NodeDescription != null)
                                        {
                                            var description = NodeDescription[0].InnerText;
                                            if (!string.IsNullOrEmpty(description))
                                            {
                                                description = description.Replace("&quot;", "");
                                            }
                                            ListDescription.Add(description);
                                        }
                                        else
                                        {
                                            ListDescription.Add("");
                                        }
                                    }
                                }

                                //Name
                                List <HtmlNode> nodehtmlName = htmlDoc.DocumentNode.Descendants().Where
                                                                   (x => (x.Name == "div" && x.Attributes["class"] != null &&
                                                                          x.Attributes["class"].Value.Contains("_6a _5u5j _6b"))).ToList();

                                var ListName = new List <string>();
                                if (nodehtmlName != null && nodehtmlName.Count > 0)
                                {
                                    foreach (var item in nodehtmlName)
                                    {
                                        var NodeName = item.Descendants("a").ToList();
                                        if (NodeName != null)
                                        {
                                            var name = NodeName[0].InnerText;
                                            if (!string.IsNullOrEmpty(name))
                                            {
                                                name = name.Replace("&quot;", "");
                                            }
                                            ListName.Add(name);
                                        }
                                        else
                                        {
                                            ListName.Add("");
                                        }
                                    }
                                }

                                // fb_id
                                var nodeFb_Id = htmlDoc.DocumentNode.Descendants().Where
                                                (
                                    x => (x.Name == "div" && x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("_5pcp _5lel _2jyu _232_"))
                                                ).ToList();

                                List <string> fb_ids = new List <string>();
                                if (nodeFb_Id != null && nodeFb_Id.Count > 0)
                                {
                                    foreach (var item in nodeFb_Id)
                                    {
                                        var strfb_id = item.GetAttributeValue("id", "");
                                        if (!string.IsNullOrEmpty(strfb_id))
                                        {
                                            //var split = strfb_id.Split(';').ToList();
                                            //if (split != null && split.Count > 1)
                                            //{
                                            //    var fb_id = split[1];
                                            //    fb_ids.Add(fb_id);
                                            //}
                                            //else
                                            //{
                                            //    fb_ids.Add("");
                                            //}

                                            var fb_id = findFbId_v2(strfb_id);
                                            if (!string.IsNullOrEmpty(fb_id))
                                            {
                                                fb_ids.Add(fb_id);
                                            }
                                            else
                                            {
                                                fb_ids.Add("");
                                            }
                                        }
                                    }
                                }

                                List <HtmlNode> nodeHtmlImage = htmlDoc.DocumentNode.Descendants().Where
                                                                    (x => (x.Name == "div" && x.Attributes["class"] != null &&
                                                                           x.Attributes["class"].Value.Contains("mtm"))).ToList();

                                if (nodeHtmlImage != null && nodeHtmlImage.Count > 0)
                                {
                                    var index = 0;
                                    foreach (var item in nodeHtmlImage)
                                    {
                                        List <string> fb_id          = new List <string>();
                                        var           nodeChildImage = item.Descendants("a").ToList();
                                        if (nodeChildImage != null && nodeChildImage.Count > 0)
                                        {
                                            foreach (var itemImage in nodeChildImage)
                                            {
                                                var _image     = itemImage.GetAttributeValue("data-ploi", "");
                                                var _apiDetail = itemImage.GetAttributeValue("href", "");
                                                if (!string.IsNullOrEmpty(_image))
                                                {
                                                    _image = _image.Replace("amp;", "");
                                                }

                                                var Pin = new PinsModels();
                                                if (!string.IsNullOrEmpty(_image) && !string.IsNullOrEmpty(_apiDetail))
                                                {
                                                    var Splits = _apiDetail.Split('/').ToList();
                                                    if (Splits != null && Splits.Count >= 5)
                                                    {
                                                        fb_id.Add(Splits[4]);
                                                    }

                                                    if (fb_ids != null && fb_ids.Count >= index /*&& nodeChildImage.Count == 1*/)
                                                    {
                                                        if (!string.IsNullOrEmpty(fb_ids[index]))
                                                        {
                                                            fb_id.Add(fb_ids[index]);
                                                        }
                                                    }
                                                    CrawlerFBDetail(_apiDetail, fb_id, ref Pin);
                                                    Pin.ImageURL = _image;
                                                    if (ListDescription != null && ListDescription.Count >= index)
                                                    {
                                                        Pin.Description = ListDescription[index];
                                                    }

                                                    if (ListName != null && ListName.Count >= index)
                                                    {
                                                        Pin.OwnerName = ListName[index];
                                                    }
                                                    pins.Pins.Add(Pin);
                                                }
                                            }
                                        }

                                        //post dynamic
                                        var nodeChildDynamic = item.Descendants("ul").ToList();
                                        if (nodeChildDynamic != null && nodeChildDynamic.Count > 0)
                                        {
                                            var _doc = new HtmlDocument();
                                            _doc.LoadHtml(nodeChildDynamic[0].InnerHtml);
                                            var nodeLI = _doc.DocumentNode.Descendants().Where(
                                                x => (x.Name == "li" && x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("_5ya"))).ToList();
                                            if (nodeLI != null && nodeLI.Count > 0)
                                            {
                                                foreach (var itemLI in nodeLI)
                                                {
                                                    var Pin         = new PinsModels();
                                                    var nodeLIImage = itemLI.Descendants("img").ToList();
                                                    if (nodeLIImage != null && nodeLIImage.Count > 0)
                                                    {
                                                        var _image = nodeLIImage[0].GetAttributeValue("src", "");
                                                        if (!string.IsNullOrEmpty(_image))
                                                        {
                                                            _image       = _image.Replace("amp;", "");
                                                            Pin.ImageURL = _image;
                                                            var PinId = findFbOh(_image);
                                                            Pin.ID = PinId + "_" + fb_ids[index];
                                                        }
                                                    }

                                                    var nodeLink = itemLI.Descendants("a").ToList();
                                                    if (nodeLink != null && nodeLink.Count > 0)
                                                    {
                                                        var _link = nodeLink[0].GetAttributeValue("href", "");
                                                        Pin.Link = _link;
                                                    }

                                                    //description
                                                    var nodeLIDescription = itemLI.Descendants("div").ToList();
                                                    if (nodeLIDescription != null && nodeLIDescription.Count > 0)
                                                    {
                                                        var _description = nodeLIDescription.Where(x => x.LastChild.Name.Equals("#text")).FirstOrDefault();
                                                        if (_description != null)
                                                        {
                                                            Pin.Description = _description.InnerText;
                                                        }
                                                    }
                                                    if (ListName != null && ListName.Count >= index)
                                                    {
                                                        Pin.OwnerName = ListName[index];
                                                    }
                                                    if (!string.IsNullOrEmpty(Pin.ID))
                                                    {
                                                        pins.Pins.Add(Pin);
                                                    }
                                                }
                                            }
                                        }
                                        index++;
                                    }
                                }
                            }
                            else
                            {
                                return;
                            }
                        }
                    }
                }
            }

            // đệ quy craweler next page
            cursor = cursor + 8;
            CrawlerNextPage(pageId, userId, cursor, referer, ref pins);
        }
예제 #10
0
        public static void CrawlerNextPage(string pageId, string userId, int cursor, string referer, string cookie, ref int countExp, ref CMS_CrawlerModels pins)
        {
            int    _port          = 0;
            string _proxy         = CommonHelper.RamdomProxy(ref _port);
            var    url            = "https://www.facebook.com/pages/ads/more/?cursor=" + cursor + "&surface=www_page_ads&unit_count=8&country=1&dpr=1&__user="******"&__a=1&__req=v&__be=1&__pc=PHASED%3ADEFAULT&__rev=4075583&__spin_r=4075583&__spin_b=trunk&__spin_t=1530846023&page_id=" + pageId + "";
            Uri    uri            = new Uri(url);
            var    httpWebRequest = (HttpWebRequest)WebRequest.Create(uri);

            //httpWebRequest.Proxy = new WebProxy(_proxy, _port);
            httpWebRequest.KeepAlive = false;
            /* request need cookie & user agent */
            httpWebRequest.Headers["Cookie"] = cookie;
            httpWebRequest.Referer           = referer;
            httpWebRequest.UserAgent         = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0";
            httpWebRequest.Accept            = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
            httpWebRequest.Timeout           = 9000000;
            try
            {
                using (HttpWebResponse httpResponse = (HttpWebResponse)httpWebRequest.GetResponse())
                {
                    try
                    {
                        if (httpResponse.StatusCode == HttpStatusCode.OK)
                        {
                            using (var streamReader = new StreamReader(httpResponse.GetResponseStream()))
                            {
                                var html = streamReader.ReadToEnd();
                                if (!string.IsNullOrEmpty(html))
                                {
                                    html = html.Replace("for (;;);", "");
                                    JavaScriptSerializer jsonSerializer = new JavaScriptSerializer();
                                    dynamic dobj   = jsonSerializer.Deserialize <dynamic>(html);
                                    var     domops = dobj["domops"];
                                    if (domops != null)
                                    {
                                        var _objhtmt = domops[0][3];
                                        if (_objhtmt != null)
                                        {
                                            var _html = _objhtmt["__html"];
                                            if (!string.IsNullOrEmpty(_html))
                                            {
                                                CrawlerDataFacebook(_html, true, ref pins, ref pageId);
                                                streamReader.Close();
                                                streamReader.Dispose();
                                                Thread.Sleep(500);
                                                /* crawl detail */
                                                if (pins != null && pins.Pins != null && pins.Pins.Any())
                                                {
                                                    var totalPin = pins.Pins.Count;
                                                    NSLog.Logger.Info("Total Pin master :" + totalPin);
                                                    Parallel.ForEach(pins.Pins, (item) =>
                                                    {
                                                        if (!item.IsDynamic && string.IsNullOrEmpty(item.ID))
                                                        {
                                                            Thread.Sleep(5000);
                                                            CrawlerFBDetail(item.LinkApi, item.FbIds, cookie, ref item);
                                                        }
                                                    });
                                                }
                                                // đệ quy craweler next page
                                                cursor = cursor + 8;
                                                CrawlerNextPage(pageId, userId, cursor, referer, cookie, ref countExp, ref pins);
                                            }
                                            else
                                            {
                                                return;
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                    catch (IOException exIO)
                    {
                        NSLog.Logger.Info("rawl next page error io exception" + url + " ", exIO.Message);
                        Thread.Sleep(500);
                        if (countExp <= 5)
                        {
                            countExp = countExp + 1;
                            CrawlerNextPage(pageId, userId, cursor, referer, cookie, ref countExp, ref pins);
                        }
                    }
                    catch (Exception ex)
                    {
                        if (httpResponse.StatusCode == HttpStatusCode.NotFound)
                        {
                            Thread.Sleep(500);
                            if (countExp <= 5)
                            {
                                countExp = countExp + 1;
                                CrawlerNextPage(pageId, userId, cursor, referer, cookie, ref countExp, ref pins);
                            }
                        }
                    }
                }
            }
            catch (WebException ex)
            {
                NSLog.Logger.Info("Crawl next page error : " + url + " " + ex.Message);
                if (ex.Status == WebExceptionStatus.ProtocolError && ex.Response != null)
                {
                    var resp = (HttpWebResponse)ex.Response;
                    if (resp.StatusCode == HttpStatusCode.NotFound)
                    {
                        Thread.Sleep(500);
                        if (countExp <= 5)
                        {
                            countExp = countExp + 1;
                            CrawlerNextPage(pageId, userId, cursor, referer, cookie, ref countExp, ref pins);
                        }
                    }
                }
                else
                {
                    Thread.Sleep(500);
                    if (countExp <= 5)
                    {
                        countExp = countExp + 1;
                        CrawlerNextPage(pageId, userId, cursor, referer, cookie, ref countExp, ref pins);
                    }
                }
            }
            catch (IOException exIO)
            {
                NSLog.Logger.Info("rawl next page error io exception" + url + " ", exIO.Message);
                Thread.Sleep(500);
                if (countExp <= 5)
                {
                    countExp = countExp + 1;
                    CrawlerNextPage(pageId, userId, cursor, referer, cookie, ref countExp, ref pins);
                }
            }
            catch (Exception ex) {
                NSLog.Logger.Error("rawl next page error :", ex);
            }
            // httpWebRequest.Abort();//cancel request
        }
예제 #11
0
        public static bool Get_Tagged_OrtherPins(ref CMS_CrawlerModels model, string search_str, int limit = 1, string bookmarks_str = null, int page = 1, string pinId = "")
        {
            if (page > limit)
            {
                return(false);
            }
            var next_page = false;

            if (!string.IsNullOrEmpty(bookmarks_str))
            {
                next_page = true;
            }

            string data   = string.Empty;
            var    urlOrg = Commons.HostApiOrtherPin;
            var    path   = string.Empty;

            if (!next_page)
            {
                var objJson = new
                {
                    options = new
                    {
                        field_set_key          = "base_grid",
                        pin                    = pinId,
                        prepend                = false,
                        search_query           = search_str,
                        source                 = "search",
                        top_level_source       = "search",
                        top_level_source_depth = 1,
                        context_pin_ids        = new string[] { }
                    },
                    context = new
                    {
                    },
                };
                string input = JsonConvert.SerializeObject(objJson);
                urlOrg = Commons.HostApiOrtherPin + "/pin/" + pinId + "/";
                path   = "";
                string[] pattern      = new string[] { "\n", "\r", "\t" };
                string[] replacements = new string[] { "", "", "" };
                data = Preg_replace(input, pattern, replacements);
            }
            else
            {
                var objJson = new
                {
                    options = new
                    {
                        field_set_key          = "base_grid",
                        pin                    = pinId,
                        prepend                = false,
                        search_query           = search_str,
                        source                 = "search",
                        top_level_source       = "search",
                        top_level_source_depth = 1,
                        bookmarks              = new string[] { bookmarks_str },
                        context_pin_ids        = new string[] { }
                    },
                    context = new
                    {
                    },
                };
                string input = JsonConvert.SerializeObject(objJson);
                urlOrg = Commons.HostApiOrtherPin + "/pin/" + pinId + "/";
                path   = "";
                string[] pattern      = new string[] { "\n", "\r", "\t" };
                string[] replacements = new string[] { "", "", "" };
                data = Preg_replace(input, pattern, replacements);
            }

            // data = HttpContext.Current.Server.UrlEncode(data);
            var timestamp = GetTimestamp(DateTime.Now);
            var url       = urlOrg + "&data=" + data + "&_=" + timestamp;
            var bookmarks = "";

            getDataPinterest(url, model, pinId, ref bookmarks);

            if (!string.IsNullOrEmpty(bookmarks))
            {
                Get_Tagged_OrtherPins(ref model, search_str, limit, bookmarks, ++page, pinId);
            }
            return(false);
        }
예제 #12
0
        public static bool Get_Tagged_Pins(ref CMS_CrawlerModels model, string search_str, int limit = 1, string bookmarks_str = null, int page = 1)
        {
            if (page > limit)
            {
                return(false);
            }
            var next_page = false;

            if (!string.IsNullOrEmpty(bookmarks_str))
            {
                next_page = true;
            }

            string data   = string.Empty;
            var    urlOrg = Commons.HostApi + search_str;
            var    path   = string.Empty;

            if (!next_page)
            {
                var objJson = new
                {
                    options = new
                    {
                        scope = "pins",
                        show_scope_selector = true,
                        query = search_str
                    },
                    context = new
                    {
                        app_version = "aad9791"
                    },
                    module = new
                    {
                        name    = "SearchPage",
                        options = new
                        {
                            scope = "pins",
                            query = search_str
                        }
                    },
                    append         = false,
                    error_strategy = 0
                };
                string input = JsonConvert.SerializeObject(objJson);
                urlOrg = Commons.HostApi + search_str + "&rs=typed&term_meta[]= " + search_str + "|typed";
                path   = "";
                string[] pattern      = new string[] { "\n", "\r", "\t" };
                string[] replacements = new string[] { "", "", "" };
                data = Preg_replace(input, pattern, replacements);
            }
            else
            {
                var objJson = new
                {
                    options = new
                    {
                        scope = "pins",
                        show_scope_selector = "null",
                        query     = search_str,
                        bookmarks = new string[] { bookmarks_str },
                    },
                    context = new
                    {
                        app_version = "2f83a7e"
                    },
                    module = new
                    {
                        name    = "GridItems",
                        options = new
                        {
                            scope            = "pins",
                            scrollable       = true,
                            show_grid_footer = true,
                            centered         = true,
                            reflow_all       = true,
                            virtualize       = true,
                            item_options     = new
                            {
                                show_pinner      = true,
                                show_pinned_from = false,
                                show_board       = true
                            },
                            layout = "variable_height",
                        }
                    },
                    append         = true,
                    error_strategy = 2
                };
                string input = JsonConvert.SerializeObject(objJson);
                urlOrg = Commons.HostApi + search_str + "&rs=typed&term_meta[]=" + search_str + "|typed";
                path   = "";
                string[] pattern      = new string[] { "\n", "\r", "\t" };
                string[] replacements = new string[] { "", "", "" };
                data = Preg_replace(input, pattern, replacements);
            }

            // data = HttpContext.Current.Server.UrlEncode(data);
            var timestamp = GetTimestamp(DateTime.Now);
            var url       = urlOrg + "&data=" + data + "" + path + "&_=" + timestamp;
            var bookmarks = "";

            getDataPinterest(url, model, "", ref bookmarks);

            if (!string.IsNullOrEmpty(bookmarks))
            {
                Get_Tagged_Pins(ref model, search_str, limit, bookmarks, ++page);
            }

            return(false);
        }
예제 #13
0
        public ActionResult Search()
        {
            try
            {
                var FilterModel = new PinFilterDTO();
                FilterModel.PageIndex       = Commons.PageIndex;
                FilterModel.PageSize        = Commons.PageSize;
                FilterModel.CreatedDateFrom = null;
                FilterModel.CreatedDateTo   = null;
                // var _Key = Request["Key"] ?? "";
                var TypeTime = Request["TypeTime"] ?? "2";
                //var Sort1 = Request["Sort1"] ?? "";
                var Sort2 = Request["Sort2"] ?? "2";

                //  var TypePin = Request["TypePin"] ?? "";
                var _TypeQuantity = Request["TypeQuantity"];
                int TypeQuantity  = -1;
                if (!string.IsNullOrEmpty(_TypeQuantity))
                {
                    TypeQuantity = Convert.ToInt16(_TypeQuantity);
                }
                var    Keywords    = Request["listKeywords"] ?? null;
                char[] separator   = new char[] { ',' };
                var    ListKeyword = CommonHelper.ParseStringToList(Keywords, separator);
                var    _FromDate   = Convert.ToDateTime(Request["FromDate"]);
                var    _ToDate     = Convert.ToDateTime(Request["ToDate"]);
                #region "comment"
                //cache data
                //Response.Cookies["TypeTime"].Value = TypeTime.ToString();
                //Response.Cookies["TypeTime"].Expires = DateTime.Now.AddYears(1); // add expiry time

                //Response.Cookies["TypePin"].Value = TypePin.ToString();
                //Response.Cookies["TypePin"].Expires = DateTime.Now.AddYears(1); // add expiry time

                //Response.Cookies["FromDate"].Value = _FromDate.ToString();
                //Response.Cookies["FromDate"].Expires = DateTime.Now.AddYears(1); // add expiry time
                //Response.Cookies["ToDate"].Value = _ToDate.ToString();
                //Response.Cookies["ToDate"].Expires = DateTime.Now.AddYears(1); // add expiry time
                //if(TypeQuantity != 0)
                //{
                //    Response.Cookies["TypeQuantity"].Value = TypeQuantity.ToString();
                //    Response.Cookies["TypeQuantity"].Expires = DateTime.Now.AddYears(1); // add expiry time
                //}
                #endregion
                FilterModel.CreatedAtFrom = _FromDate;
                FilterModel.CreatedAtTo   = _ToDate;

                var _Group = Request["GroupID"] ?? "";
                if (!string.IsNullOrEmpty(_Group))
                {
                    FilterModel.LstGroupID.Add(_Group);

                    var _lstKeywords = getListKeyWordByGroup(_Group);
                    FilterModel.LstKeyWordID.AddRange(_lstKeywords);
                }

                if (TypeQuantity.ToString() == Commons.EQuantityType.ZeroToOne.ToString("d"))
                {
                    FilterModel.PinCountFrom = 0;
                    FilterModel.PinCountTo   = 100;
                }
                if (TypeQuantity.ToString() == Commons.EQuantityType.OneToTwo.ToString("d"))
                {
                    FilterModel.PinCountFrom = 100;
                    FilterModel.PinCountTo   = 200;
                }
                if (TypeQuantity.ToString() == Commons.EQuantityType.TwoToThree.ToString("d"))
                {
                    FilterModel.PinCountFrom = 200;
                    FilterModel.PinCountTo   = 300;
                }
                if (TypeQuantity.ToString() == Commons.EQuantityType.ThreeToFour.ToString("d"))
                {
                    FilterModel.PinCountFrom = 300;
                    FilterModel.PinCountTo   = 400;
                }
                if (TypeQuantity.ToString() == Commons.EQuantityType.FourToFive.ToString("d"))
                {
                    FilterModel.PinCountFrom = 400;
                    FilterModel.PinCountTo   = 500;
                }
                if (TypeQuantity.ToString() == Commons.EQuantityType.MoreFive.ToString("d"))
                {
                    FilterModel.PinCountFrom = 500;
                }

                if (ListKeyword != null && ListKeyword.Count > 0)
                {
                    FilterModel.LstKeyWordID = ListKeyword;
                    // Response.Cookies["Keywords"].Value = Keywords.ToString();
                    //  Response.Cookies["Keywords"].Expires = DateTime.Now.AddYears(1); // add expiry time
                }

                FilterModel.TypeTime = TypeTime;
                var tmp = 0;
                int.TryParse(TypeTime, out tmp);
                FilterModel.Sort1 = tmp;
                int.TryParse(Sort2, out tmp);
                FilterModel.Sort2 = tmp;

                var modelCrawler = new CMS_CrawlerModels();
                var _pinModels   = new List <PinsModels>();
                var msg          = "";
                int totalPin     = 0;
                var result       = _fac.GetPin(ref _pinModels, ref totalPin, FilterModel, ref msg);
                if (result)
                {
                    modelCrawler.Pins = _pinModels;
                }
                return(PartialView("_ListItem", modelCrawler));
            }
            catch (Exception ex) { }
            return(new HttpStatusCodeResult(HttpStatusCode.BadRequest));
        }
        public bool CrawlData(string Id, string createdBy, ref string msg)
        {
            NSLog.Logger.Info("CrawlData: " + Id);
            var result = true;

            try
            {
                using (var _db = new CMS_Context())
                {
                    /* get key by ID */
                    var keyWord = _db.CMS_KeyWord.Where(o => o.ID == Id && o.Status == (byte)Commons.EStatus.Active).FirstOrDefault();
                    if (keyWord != null)
                    {
                        /* check time span crawl */
                        var timeSpanCrawl = DateTime.Now - keyWord.UpdatedDate;
                        if (timeSpanCrawl.Value.TotalMinutes > 5 || keyWord.UpdatedDate == keyWord.CreatedDate) /* 5min to crawl data again */
                        {
                            /* update crawer date */
                            var bkTime = keyWord.UpdatedDate;
                            keyWord.UpdatedDate = DateTime.Now;
                            keyWord.UpdatedBy   = createdBy;
                            keyWord.KeyWord     = keyWord.KeyWord.Trim();
                            _db.SaveChanges();

                            /* cookies
                             *  User: [email protected]
                             *  Pass: pitool.org79
                             */
                            if (!string.IsNullOrEmpty(keyWord.CrawlAccountID))
                            {
                                var _Cookie = keyWord.CMS_Account.Cookies;
                                if (!string.IsNullOrEmpty(_Cookie))
                                {
                                    CrawlerBoardHelper._Cookies = _Cookie;
                                }
                                else
                                {
                                    CrawlerHelper._Cookies = "_b = \"AS+B1gn0GdpGgLQl83JubKX1bG19kiuUUvX8lnvITKDHNq2tJcgqXNIQ0cLN+kjq4KM=\"; _pinterest_pfob = enabled; _ga = GA1.2.229901352.1528170174; pnodepath = \"/pin4\"; fba = True; G_ENABLED_IDPS = google; bei = false; logged_out = True; sessionFunnelEventLogged = 1; cm_sub = denied; _auth = 1; csrftoken = fkrSitmDb4vW2kT1G3GfOkcC8mPvl0kV; _pinterest_sess = \"TWc9PSZWaE0xeDZOVm4yL3Yva0VSazRkRjlHR013bk9mdVJBcU9zVEtEOUhXVjhKSFZmZUEreWJiNDYrV3FubVRoVzdqdDF0dmtDcXErcFF6MmlXQUQ3RDVzWERCWTZYZUt4eXMzemkvOGlXRFZQT1J6MjkwampOZlVJUFEvTnNkTUZYMkJ3dGxPTTRKaVIwdGNJY2h1MUhaSHlFT3djd0huNHE0YmtiTTBZR3dVTVB3d0RyYVE4UC8rMjZCYWo2eTJLNGJVSHR6KzRENjlWVE0rNFMxNWdGMUtVL0VtL2RDZktiUFg3M1Y2Z2dEbllPeUxFR3FOdEd6SUJSRTlBMWs1YkJnbTBlWHhwcC9pMmlqRmoydlh0V2VQSGYvYk1zeXlSM2dIU1dmUXIyRWVxWVBPdTYzbHFjcVhYRWRBT0FTQ3VBNmdWMm5QUlREZDdSY2ZQeE1NWklqSUZxNDllVHF1WUVzRFRrRjBXQnZCMVBGTlYxT0UzM1daeHFOUnBBTzliMzFJdmovQ1hQR2Vvc1pkTHNxL1FjT3FrWllTR1d6VHFrd2g5cFBFMmswM3dIa0dOOHVCbGd6aVlKUkJlZlZNeWVyRTBYREcrQVFlUTdRc1NqMlFlQ3RvaWlZMjJXZ1RURmIxNDA2d2JTODRGNk9BYWpoRzVJTUhLMkJ4UDJGb0NmN0NOQXpmZ0FoR08xcElmWmh5S29OeGRadFpDVWR1RGw3ZzZGRS81SlU4UlhSUVlIWm4wRzRJMGFVaTQzdGI3T2ovSCtHR2ZSWlk0M1RCN2JXSmZJRFdQUUpZWVpRMW5ta0pMbXgwT2NZckZJcHg0RTJrTjJlZWJIdXFSdkdJTWNXc2d3NHpXdzFTRGhKVkN4YmY4SCtJaTdSQSt0K2dhc1VDc0tkNnJIeVFhb3BHeDd6OUwvamZsanRKV0ZYNGFmZWFQNGlqNFVqekVFcGUreHU4UGVqZXRuMFVDNE1QbkFuWnJ6YzNjMTF3dVNZUHJ2MjBwMi8xeXNwbnczMlpSa3cvbzVPQUhQSyswNlU4Y2JQaThxNWN1NWtHVm83SWc0YjJVVW1tUWZYcHpWR2RCYS8wRE0yb2RtNUs0NzRteFp4JjVhOXZDbjB5RGtxL1lROE5WOVNDMjB4c1dMND0=\"";
                                }
                            }
                            else
                            {
                                CrawlerHelper._Cookies = "_b = \"AS+B1gn0GdpGgLQl83JubKX1bG19kiuUUvX8lnvITKDHNq2tJcgqXNIQ0cLN+kjq4KM=\"; _pinterest_pfob = enabled; _ga = GA1.2.229901352.1528170174; pnodepath = \"/pin4\"; fba = True; G_ENABLED_IDPS = google; bei = false; logged_out = True; sessionFunnelEventLogged = 1; cm_sub = denied; _auth = 1; csrftoken = fkrSitmDb4vW2kT1G3GfOkcC8mPvl0kV; _pinterest_sess = \"TWc9PSZWaE0xeDZOVm4yL3Yva0VSazRkRjlHR013bk9mdVJBcU9zVEtEOUhXVjhKSFZmZUEreWJiNDYrV3FubVRoVzdqdDF0dmtDcXErcFF6MmlXQUQ3RDVzWERCWTZYZUt4eXMzemkvOGlXRFZQT1J6MjkwampOZlVJUFEvTnNkTUZYMkJ3dGxPTTRKaVIwdGNJY2h1MUhaSHlFT3djd0huNHE0YmtiTTBZR3dVTVB3d0RyYVE4UC8rMjZCYWo2eTJLNGJVSHR6KzRENjlWVE0rNFMxNWdGMUtVL0VtL2RDZktiUFg3M1Y2Z2dEbllPeUxFR3FOdEd6SUJSRTlBMWs1YkJnbTBlWHhwcC9pMmlqRmoydlh0V2VQSGYvYk1zeXlSM2dIU1dmUXIyRWVxWVBPdTYzbHFjcVhYRWRBT0FTQ3VBNmdWMm5QUlREZDdSY2ZQeE1NWklqSUZxNDllVHF1WUVzRFRrRjBXQnZCMVBGTlYxT0UzM1daeHFOUnBBTzliMzFJdmovQ1hQR2Vvc1pkTHNxL1FjT3FrWllTR1d6VHFrd2g5cFBFMmswM3dIa0dOOHVCbGd6aVlKUkJlZlZNeWVyRTBYREcrQVFlUTdRc1NqMlFlQ3RvaWlZMjJXZ1RURmIxNDA2d2JTODRGNk9BYWpoRzVJTUhLMkJ4UDJGb0NmN0NOQXpmZ0FoR08xcElmWmh5S29OeGRadFpDVWR1RGw3ZzZGRS81SlU4UlhSUVlIWm4wRzRJMGFVaTQzdGI3T2ovSCtHR2ZSWlk0M1RCN2JXSmZJRFdQUUpZWVpRMW5ta0pMbXgwT2NZckZJcHg0RTJrTjJlZWJIdXFSdkdJTWNXc2d3NHpXdzFTRGhKVkN4YmY4SCtJaTdSQSt0K2dhc1VDc0tkNnJIeVFhb3BHeDd6OUwvamZsanRKV0ZYNGFmZWFQNGlqNFVqekVFcGUreHU4UGVqZXRuMFVDNE1QbkFuWnJ6YzNjMTF3dVNZUHJ2MjBwMi8xeXNwbnczMlpSa3cvbzVPQUhQSyswNlU4Y2JQaThxNWN1NWtHVm83SWc0YjJVVW1tUWZYcHpWR2RCYS8wRE0yb2RtNUs0NzRteFp4JjVhOXZDbjB5RGtxL1lROE5WOVNDMjB4c1dMND0=\"";
                            }
                            var searchStr = HttpUtility.UrlEncode(keyWord.KeyWord);

                            /* get first class result */
                            var           model = new CMS_CrawlerModels();
                            CMSPinFactory _fac  = new CMSPinFactory();
                            CrawlerHelper.Get_Tagged_Pins(ref model, searchStr, Commons.PinDefault);
                            if (model != null && model.Pins != null && model.Pins.Any())
                            {
                                /* get second class result */
                                var listPinID = model.Pins.Select(o => o.ID).ToList();
                                Parallel.ForEach(listPinID, pinID =>
                                {
                                    CrawlerHelper.Get_Tagged_OrtherPins(ref model, searchStr, Commons.PinOrtherDefault, "", 1, pinID);
                                });
                            }

                            /* create or update pin */
                            var res = _fac.CreateOrUpdate(model.Pins, keyWord.ID, createdBy, ref msg);

                            if (res == false)
                            {
                                /* back to last crawl data */
                                //keyWord.UpdatedDate = bkTime;
                                //_db.SaveChanges();
                                result = false;
                            }
                            else
                            {
                                keyWord.UpdatedDate = DateTime.Now;
                                _db.SaveChanges();
                            }
                        }
                    }
                }

                NSLog.Logger.Info("ResponseCrawlData: " + Id, result);
            }
            catch (Exception ex)
            {
                msg    = "Crawl data is unsuccessfully.";
                result = false;
                LogHelper.WriteLogs("ErrorCrawlData: " + Id, JsonConvert.SerializeObject(ex));
                NSLog.Logger.Error("ErrorCrawlData: " + Id, ex);
            }

            return(result);
        }
예제 #15
0
        public static CMS_CrawlerModels getDataPinterest(string url, CMS_CrawlerModels model, string pinId, ref string bookmarks)
        {
            try
            {
                Uri uri            = new Uri(url);
                var httpWebRequest = (HttpWebRequest)WebRequest.Create(uri);

                /* request need cookie & user agent */
                httpWebRequest.Headers["Cookie"] = "fr=0HZLfh0cIOmtmNqCq.AWXR_MW9yNog0CyLSTuvJhhdnGM.BajqQf.RE.AAA.0.0.BbOi7n.AWUh9bO8; sb=Cs05WwnlYymkzEg6Xn32mzc8; wd=1366x654; datr=Js05W_jbAaa1Ij5CurtBJmwC; locale=en_GB; c_user=100003324695675; xs=23%3AVia9gvMSQtiufw%3A2%3A1530514908%3A467%3A6165; pl=n; spin=r.4066324_b.trunk_t.1530514908_s.1_v.2_; act=1530541156947%2F6; presence=EDvF3EtimeF1530541148EuserFA21B03324695675A2EstateFDutF1530541148851CEchFDp_5f1B03324695675F4CC; x-src=%2Fpg%2Flifewithsunshine%2Fads%2F%7Ccontent_container; pnl_data2=eyJhIjoib25hZnRlcmxvYWQiLCJjIjoiWFBhZ2VzUHJvZmlsZUhvbWVDb250cm9sbGVyIiwiYiI6ZmFsc2UsImQiOiIvbGlmZXdpdGhzdW5zaGluZS9hZHMvIiwiZSI6W119";
                httpWebRequest.UserAgent         = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0";

                httpWebRequest.Timeout = 100000;
                var httpResponse = (HttpWebResponse)httpWebRequest.GetResponse();

                using (var streamReader = new StreamReader(httpResponse.GetResponseStream()))
                {
                    var answer  = streamReader.ReadToEnd();
                    var htmlDoc = new HtmlDocument();
                    htmlDoc.LoadHtml(answer);

                    /* get list scripts */
                    var scripts = htmlDoc.DocumentNode.Descendants("script").ToList();

                    var listData = new List <string>();
                    int i        = 0;
                    foreach (var script in scripts)
                    {
                        i++;
                        if (i == 3) /* ERROR IN 3TH SCRIPT */
                        {
                            break;
                        }

                        /* find pay load element */
                        var res = findElement(script.InnerHtml, "payload", 0);
                        if (!string.IsNullOrEmpty(res))
                        {
                            JavaScriptSerializer jsonSerializer = new JavaScriptSerializer();
                            dynamic dobj     = jsonSerializer.Deserialize <dynamic>(res);
                            var     htmlData = dobj["content"];
                            if (htmlData != null)
                            {
                                var xmlData = htmlData["content"];
                                if (xmlData != null)
                                {
                                    /* get list tag a */
                                    htmlDoc.LoadHtml(xmlData);
                                    var lstA = htmlDoc.DocumentNode.Descendants("a").Where(n => n.GetAttributeValue("rel", "") == "theater").ToList();
                                    foreach (var tagA in lstA)
                                    {
                                        /* GET DATA MODEL */
                                        var href    = tagA.GetAttributeValue("href", "");
                                        var ajaxify = tagA.GetAttributeValue("ajaxify", "");
                                        var fbID    = findID(ajaxify);
                                        var pin     = new PinsModels()
                                        {
                                            ID   = fbID,
                                            Link = href,
                                        };
                                        model.Pins.Add(pin);

                                        CrawlerFBDetail(href, fbID, ref pin);
                                    }
                                }
                            }
                        }
                    }
                    streamReader.Close();
                    streamReader.Dispose();
                }
            }
            catch (Exception ex)
            {
                NSLog.Logger.Error("ErrorgetDataPinterest" + "\n url: " + url + "\nBookmarks:" + bookmarks, ex);
            }
            return(model);
        }
예제 #16
0
        public static void CrawlerDataFacebook(string strHtml, bool IsNextPage, ref CMS_CrawlerModels pins1, ref string _pageId)
        {
            try
            {
                if (!string.IsNullOrEmpty(strHtml))
                {
                    CMS_CrawlerModels pins = new CMS_CrawlerModels();
                    var htmlDoc            = new HtmlDocument();
                    htmlDoc.LoadHtml(strHtml);

                    //find page id of fan page
                    if (!IsNextPage)
                    {
                        var nodePageId = htmlDoc.DocumentNode.Descendants().Where
                                             (x => (x.Name == "div" && x.Attributes["class"] != null &&
                                                    x.Attributes["class"].Value.Contains("_643h"))).ToList();
                        if (nodePageId != null && nodePageId.Count > 0)
                        {
                            var _643h    = nodePageId[0].GetAttributeValue("data-report-meta", "");
                            var str_643h = System.Web.HttpUtility.HtmlDecode(_643h);
                            if (!string.IsNullOrEmpty(_643h))
                            {
                                JObject o = JObject.Parse(str_643h);
                                if (o != null)
                                {
                                    _pageId = o.SelectToken("landing_page_id").ToString();
                                }
                            }
                        }
                    }

                    List <HtmlNode> nodeHtml = htmlDoc.DocumentNode.Descendants().Where
                                                   (x => (x.Name == "div" && x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("_643h"))).ToList();

                    if (nodeHtml != null && nodeHtml.Count > 0)
                    {
                        var           description = "";
                        var           OwnerName   = "";
                        List <string> fb_ids      = null;
                        foreach (var itemHtml in nodeHtml)
                        {
                            var _node = itemHtml.Descendants("div")
                                        .Where(x => !x.InnerText.Equals("report") &&
                                               x.InnerHtml.Contains("_5pbx userContent _3576") &&
                                               x.InnerHtml.Contains("_6a _5u5j _6b") &&
                                               x.InnerHtml.Contains("_5pcp _5lel _2jyu _232_") &&
                                               x.InnerHtml.Contains("mtm")).ToList();

                            if (_node != null && _node.Count > 0)
                            {
                                fb_ids = new List <string>();
                                var item  = _node[0];
                                var _Html = item.InnerHtml;
                                if (!string.IsNullOrEmpty(_Html))
                                {
                                    var _Doc = new HtmlDocument();
                                    _Doc.LoadHtml(_Html);
                                    // Description
                                    var _des = _Doc.DocumentNode.Descendants().Where(x => x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("_5pbx userContent _3576")).ToList();
                                    if (_des != null && _des.Count > 0)
                                    {
                                        description = _des[0].InnerText;
                                        if (!string.IsNullOrEmpty(description))
                                        {
                                            description = description.Replace("&quot;", "");
                                        }
                                    }
                                    // Owner name
                                    var _ownerName = _Doc.DocumentNode.Descendants().Where(x => x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("_6a _5u5j _6b")).ToList();
                                    if (_ownerName != null && _ownerName.Count > 0)
                                    {
                                        foreach (var itemOwner in _ownerName)
                                        {
                                            var NodeName = itemOwner.Descendants("a").ToList();
                                            if (NodeName != null)
                                            {
                                                OwnerName = NodeName[0].InnerText;
                                                if (!string.IsNullOrEmpty(OwnerName))
                                                {
                                                    OwnerName = OwnerName.Replace("&quot;", "");
                                                }
                                                else
                                                {
                                                    pins.ErrorStatus = (byte)Commons.EErrorStatus.AccBlocked;
                                                }
                                                break;
                                            }
                                        }
                                    }

                                    // fb_id
                                    var _FbId = _Doc.DocumentNode.Descendants().Where(x => x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("_5pcp _5lel _2jyu _232_")).ToList();
                                    if (_FbId != null && _FbId.Count > 0)
                                    {
                                        foreach (var itemFbId in _FbId)
                                        {
                                            var strfb_id = itemFbId.Id;
                                            if (!string.IsNullOrEmpty(strfb_id))
                                            {
                                                var charecter = "";
                                                var fb_id     = findFbId_v3(strfb_id, "subtitle_", "_", ref charecter);
                                                if (!string.IsNullOrEmpty(fb_id))
                                                {
                                                    fb_ids.Add(fb_id);
                                                }
                                                if (!string.IsNullOrEmpty(charecter) && charecter.Equals(";"))
                                                {
                                                    fb_id = findFbId_v3(strfb_id, ";", ";", ref charecter);
                                                    if (!string.IsNullOrEmpty(fb_id))
                                                    {
                                                        fb_ids.Add(fb_id);
                                                    }
                                                }
                                                break;
                                            }
                                        }
                                    }

                                    //  Image
                                    var _Image = _Doc.DocumentNode.Descendants().Where(x => x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("mtm")).ToList();

                                    if (_Image != null && _Image.Count > 0)
                                    {
                                        foreach (var itemImg in _Image)
                                        {
                                            // post normal
                                            var nodeChildImage = item.Descendants("a").ToList();
                                            if (nodeChildImage != null && nodeChildImage.Count > 0)
                                            {
                                                foreach (var itemImage in nodeChildImage)
                                                {
                                                    var fb_id      = new List <string>();
                                                    var _image     = itemImage.GetAttributeValue("data-ploi", "");
                                                    var _apiDetail = itemImage.GetAttributeValue("href", "");
                                                    if (!string.IsNullOrEmpty(_image))
                                                    {
                                                        _image = _image.Replace("amp;", "");
                                                    }


                                                    if (!string.IsNullOrEmpty(_image) && !string.IsNullOrEmpty(_apiDetail))
                                                    {
                                                        var Pin    = new PinsModels();
                                                        var Splits = _apiDetail.Split('/').ToList();
                                                        if (Splits != null && Splits.Count >= 5)
                                                        {
                                                            fb_id.Add(Splits[4]);
                                                        }
                                                        if (fb_ids != null && fb_ids.Count > 0)
                                                        {
                                                            fb_id.AddRange(fb_ids);
                                                        }
                                                        //CrawlerFBDetail(_apiDetail, fb_id, ref Pin);
                                                        Pin.LinkApi     = "https://www.facebook.com" + _apiDetail;
                                                        Pin.ImageURL    = _image;
                                                        Pin.OwnerName   = OwnerName;
                                                        Pin.Description = description;
                                                        Pin.FbIds       = fb_id;
                                                        pins.Pins.Add(Pin);
                                                        //if (!string.IsNullOrEmpty(Pin.ID))
                                                        //    pins.Pins.Add(Pin);
                                                    }
                                                }
                                            }
                                            //post dynamic
                                            var nodeChildDynamic = itemImg.Descendants("ul").ToList();
                                            if (nodeChildDynamic != null && nodeChildDynamic.Count > 0)
                                            {
                                                var _doc = new HtmlDocument();
                                                _doc.LoadHtml(nodeChildDynamic[0].InnerHtml);
                                                var nodeLI = _doc.DocumentNode.Descendants().Where(
                                                    x => (x.Name == "li" && x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("_5ya"))).ToList();
                                                if (nodeLI != null && nodeLI.Count > 0)
                                                {
                                                    Parallel.ForEach(nodeLI, (itemLI) =>
                                                    {
                                                        var Pin         = new PinsModels();
                                                        var nodeLIImage = itemLI.Descendants("img").ToList();
                                                        if (nodeLIImage != null && nodeLIImage.Count > 0)
                                                        {
                                                            var _image = nodeLIImage[0].GetAttributeValue("src", "");
                                                            if (!string.IsNullOrEmpty(_image))
                                                            {
                                                                _image       = _image.Replace("amp;", "");
                                                                Pin.ImageURL = _image;
                                                                var PinId    = findFbOh(_image);
                                                                if (!string.IsNullOrEmpty(PinId))
                                                                {
                                                                    Pin.ID = PinId + "_" + fb_ids[0];
                                                                }
                                                                else
                                                                {
                                                                    PinId = findFbHash(_image);
                                                                    if (!string.IsNullOrEmpty(PinId))
                                                                    {
                                                                        Pin.ID = PinId + "_" + fb_ids[0];
                                                                    }
                                                                    else
                                                                    {
                                                                        Pin.ID = Guid.NewGuid().ToString();
                                                                    }
                                                                }
                                                            }
                                                        }
                                                        var nodeLink = itemLI.Descendants("a").ToList();
                                                        if (nodeLink != null && nodeLink.Count > 0)
                                                        {
                                                            var _link = nodeLink[0].GetAttributeValue("href", "");
                                                            Pin.Link  = _link;
                                                        }

                                                        //description
                                                        var nodeLIDescription = itemLI.Descendants("div").ToList();
                                                        if (nodeLIDescription != null && nodeLIDescription.Count > 0)
                                                        {
                                                            var _description = nodeLIDescription.Where(x => x.LastChild.Name.Equals("#text")).FirstOrDefault();
                                                            if (_description != null)
                                                            {
                                                                Pin.Description = _description.InnerText;
                                                            }
                                                        }
                                                        Pin.OwnerName = OwnerName;
                                                        Pin.IsDynamic = true;
                                                        if (!string.IsNullOrEmpty(Pin.ID))
                                                        {
                                                            pins.Pins.Add(Pin);
                                                        }
                                                    });
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }

                    if (pins != null && pins.Pins != null && pins.Pins.Any())
                    {
                        pins1.Pins.AddRange(pins.Pins);
                    }
                }
            }
            catch (Exception ex) { }
        }
예제 #17
0
        public static void CrawlerFb(string url, string cookie, ref CMS_CrawlerModels pins, ref int countExp, ref string _pageId)
        {
            int    _port          = 0;
            string _proxy         = CommonHelper.RamdomProxy(ref _port);
            Uri    uri            = new Uri(url);
            var    httpWebRequest = (HttpWebRequest)WebRequest.Create(uri);

            //httpWebRequest.Proxy = new WebProxy(_proxy, _port);
            httpWebRequest.KeepAlive = false;
            /* request need cookie & user agent */
            httpWebRequest.Headers["Cookie"] = cookie;
            httpWebRequest.UserAgent         = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0";
            httpWebRequest.Accept            = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";

            httpWebRequest.Timeout = 9000000;

            try
            {
                using (HttpWebResponse httpResponse = (HttpWebResponse)httpWebRequest.GetResponse())
                {
                    try
                    {
                        if (httpResponse.StatusCode == HttpStatusCode.OK)
                        {
                            using (var streamReader = new StreamReader(httpResponse.GetResponseStream()))
                            {
                                var html = streamReader.ReadToEnd();
                                CrawlerDataFacebook(html, false, ref pins, ref _pageId);
                                streamReader.Close();
                                streamReader.Dispose();
                            }
                        }
                    }
                    catch (IOException exIO)
                    {
                        NSLog.Logger.Info("crawl error io exception" + url + " ", exIO.Message);
                        Thread.Sleep(500);
                        if (countExp <= 5)
                        {
                            countExp = countExp + 1;
                            CrawlerFb(url, cookie, ref pins, ref countExp, ref _pageId);
                        }
                    }
                    catch (Exception ex)
                    {
                        if (httpResponse.StatusCode == HttpStatusCode.NotFound)
                        {
                            Thread.Sleep(500);
                            if (countExp <= 5)
                            {
                                countExp = countExp + 1;
                                CrawlerFb(url, cookie, ref pins, ref countExp, ref _pageId);
                            }
                        }
                        LogHelper.WriteLogs("ErrorCrawlerFB: " + url, JsonConvert.SerializeObject(ex));
                        NSLog.Logger.Error("Crawler Fb: " + url, ex);
                    }
                }
            }
            catch (WebException ex)
            {
                NSLog.Logger.Info("Crawl error : " + url + ": ", ex.Message);
                if (ex.Status == WebExceptionStatus.ProtocolError && ex.Response != null)
                {
                    var resp = (HttpWebResponse)ex.Response;
                    if (resp.StatusCode == HttpStatusCode.NotFound)
                    {
                        Thread.Sleep(500);
                        if (countExp <= 5)
                        {
                            countExp = countExp + 1;
                            CrawlerFb(url, cookie, ref pins, ref countExp, ref _pageId);
                        }
                    }
                }
                else
                {
                    Thread.Sleep(500);
                    if (countExp <= 5)
                    {
                        countExp = countExp + 1;
                        CrawlerFb(url, cookie, ref pins, ref countExp, ref _pageId);
                    }
                }
            }
            catch (IOException exIO)
            {
                NSLog.Logger.Info("crawl error io exception" + url + " ", exIO.Message);
                Thread.Sleep(500);
                if (countExp <= 5)
                {
                    countExp = countExp + 1;
                    CrawlerFb(url, cookie, ref pins, ref countExp, ref _pageId);
                }
            }
            catch (Exception ex) {
                NSLog.Logger.Error("crawl error :", ex);
            }
            //httpWebRequest.Abort();//cancel request
        }
예제 #18
0
        public static bool Get_Tagged_HomePins(ref CMS_CrawlerModels model, int limit = 1, string bookmarks_str = null, int page = 1)
        {
            if (page > limit)
            {
                return(false);
            }
            var next_page = false;

            if (!string.IsNullOrEmpty(bookmarks_str))
            {
                next_page = true;
            }

            string data   = string.Empty;
            var    urlOrg = Commons.HostApi;

            if (!next_page)
            {
                var objJson = new
                {
                    options = new
                    {
                        field_set_key         = "hf_grid",
                        in_nux                = false,
                        is_react              = true,
                        prependPartner        = false,
                        prependUserNews       = false,
                        repeatRequestBookmark = "",
                        static_feed           = false
                    },
                    context = new
                    {
                    },
                };
                string input = JsonConvert.SerializeObject(objJson);
                urlOrg = Commons.HostApiHomePin;
                string[] pattern      = new string[] { "\n", "\r", "\t" };
                string[] replacements = new string[] { "", "", "" };
                data = Preg_replace(input, pattern, replacements);
            }
            else
            {
                var objJson = new
                {
                    options = new
                    {
                        bookmarks             = new string[] { bookmarks_str },
                        field_set_key         = "hf_grid",
                        in_nux                = false,
                        is_react              = true,
                        prependPartner        = false,
                        prependUserNews       = false,
                        repeatRequestBookmark = "",
                        static_feed           = false
                    },
                    context = new
                    {
                    },
                };
                string input = JsonConvert.SerializeObject(objJson);
                urlOrg = Commons.HostApiHomePin;
                string[] pattern      = new string[] { "\n", "\r", "\t" };
                string[] replacements = new string[] { "", "", "" };
                data = Preg_replace(input, pattern, replacements);
            }

            // data = HttpContext.Current.Server.UrlEncode(data);
            var timestamp = GetTimestamp(DateTime.Now);
            var url       = urlOrg + "&data=" + data + "&_=" + timestamp;
            var bookmarks = "";

            getDataPinterestHome(url, model, "", ref bookmarks);

            if (!string.IsNullOrEmpty(bookmarks))
            {
                Get_Tagged_HomePins(ref model, limit, bookmarks, ++page);
            }
            return(false);
        }
예제 #19
0
        public static CMS_CrawlerModels getDataPinterestHome(string url, CMS_CrawlerModels model, string pinId, ref string bookmarks)
        {
            try
            {
                Uri uri            = new Uri(url);
                var httpWebRequest = (HttpWebRequest)WebRequest.Create(uri);
                httpWebRequest.Headers["X-Requested-With"] = "XMLHttpRequest";
                httpWebRequest.Headers["Cookie"]           = "_auth=1; csrftoken=dMWi2a6L1DTFUHmyqem0oGrDmteiaETw; _pinterest_sess=\"TWc9PSZsSlA1dUF4QWlRWGRYVGR6Qm9mN3pwczUyUDk4ZDYvckduSjl4N3ZSRHlsU1VmWkhBTUsrMU9KNkxjS3pyUk1zREdDL2Rmb2VuT1dwRDhSTmxTOE1Ja0FjOUtreTJVc0o0SmthQ2xhN3lRa3BQVnRMcUF5dlN0Z255Syt4am56VnQvYVQwT0JyejBCSlk4YzFyQ0pEekZwNSs0YjZnMTBseEIvRkU0Um1XeWthZ1cvNGxpdDVyTEdrSHRzWFVLN244T25TaGVoYy93TGVSRjVxNzl5dnlZV1A5L3NlNnc5MWE4djl0ZjNoeEhqTTNuaGduRnZ2VkF1RTd6V1V3VnBCT3cyMksxMHJIdVE0TVVjc3FmWVozVllzekhpNFRGNDFBTERIVzdkcUNUS3NlWEJFdE1mSXJBbnNPVStHQXJiUWJRSENyVVVKTVJYNit5MkZTMFVNN3ptY09FNmFoaHk3Nk9MdUtuRmdDSWRWRVhPTWYrSXA4dFhlRU1hYW5paFNQMU5OcFNwY2xSZlJHZVlWWU03eHFsNWVmSWRHL0ZtN3NhdU9ubzhpUjZqMzNTTUxwMTlOQWRGa29zVUc1UXFqZ1BUYzhHL3M0YndDY2ZBN2ZMZnJQZTlGbXdPWjg5SXJVOEpUMEtPVnMzcjZPcytOVHRFUnlRUnoyNmJZdjl0YXJlOVp1WGQvM29SSi9xWUwvYmFPcDl5VFl1aEw2ZFBtMHlhZ0g4MXlIMXp1dnFXWWY1VytmY0ZPc0FSMzhqYXdhNTBqQjlYRHJ6OE9CY1ViMmljZkFhQkVydGxyVUtlNis4cnh3R3NPbXVTVjZCZUNTR1NKQ3JpWFJsajBsSEFGcytOMnptN2R2S1BXN1NocTFtZVlKMzF0Y1hyQXNseG9DdzdrQklxNnZXMkk2dXQ4azJJOTR4YWlIUDMvVzAwcmQ0SDVqNnhYc3NlTTNpK0ZHUU9xaUpCOER0N1pQaWFFTUhLRGxpdk1EVDlOYi9DdmRLcTQvdUROekpjRXNJSjVtcEl1bWVLUHhRdTVQQk91L1RWS0w0YkkzZDNwaW5mRnJFakRsck9aNTRBUXVsVFdFWVlTRHJ5OUxBWHdMa0V4Jk1FSHZIUWlQUlE2Q05OZWJydEZrV25SQ2tmND0 = \"; G_ENABLED_IDPS=google; _b=\"ATWTNNfXaINNj5j6VvA6 + rquchpAz7VF + IS8VabE7fJo7ragqOV82ASwCOgxcnxHC5k = \"; pnodepath=\" / 4\"; _ga=GA1.2.1908176321.1528170001; fba=True; cm_sub=none; sessionFunnelEventLogged=1; bei=false";
                httpWebRequest.Timeout = 100000;
                var httpResponse = (HttpWebResponse)httpWebRequest.GetResponse();

                using (var streamReader = new StreamReader(httpResponse.GetResponseStream()))
                {
                    var answer = streamReader.ReadToEnd();
                    JavaScriptSerializer jsonSerializer = new JavaScriptSerializer();
                    dynamic dobj = jsonSerializer.Deserialize <dynamic>(answer);
                    if (dobj != null)
                    {
                        var resource_data_cache = dobj["resource_response"];
                        if (resource_data_cache != null)
                        {
                            var data = resource_data_cache["data"];
                            if (data != null)
                            {
                                var results = (dynamic)null;
                                results = data;
                                if (results != null)
                                {
                                    foreach (var item in results)
                                    {
                                        var  pin     = new PinsModels();
                                        var  itemPin = item as Dictionary <string, dynamic>;
                                        bool flag    = true;
                                        if (itemPin.ContainsKey("domain"))
                                        {
                                            pin.Domain = itemPin["domain"];
                                        }
                                        else
                                        {
                                            flag = false;
                                        }
                                        if (itemPin.ContainsKey("id"))
                                        {
                                            pin.ID = itemPin["id"];
                                        }
                                        else
                                        {
                                            flag = false;
                                        }
                                        if (itemPin.ContainsKey("link"))
                                        {
                                            pin.Link = itemPin["link"];
                                        }
                                        else
                                        {
                                            flag = false;
                                        }
                                        if (itemPin.ContainsKey("created_at"))
                                        {
                                            pin.Created_At = DateTime.Parse(itemPin["created_at"], new CultureInfo("en-US", true));
                                        }
                                        else
                                        {
                                            flag = false;
                                        }
                                        if (itemPin.ContainsKey("images"))
                                        {
                                            var Images = itemPin["images"] as Dictionary <string, dynamic>;
                                            if (Images != null)
                                            {
                                                foreach (var itemImg in Images)
                                                {
                                                    var Image       = itemImg.Value;
                                                    var _ImageModel = new ImageModels()
                                                    {
                                                        url    = Image["url"],
                                                        height = Convert.ToInt16(Image["height"]),
                                                        width  = Convert.ToInt16(Image["width"])
                                                    };
                                                    pin.Images.Add(_ImageModel);
                                                }
                                            }
                                        }
                                        else
                                        {
                                            flag = false;
                                        }
                                        if (flag)
                                        {
                                            model.Pins.Add(pin);
                                        }
                                    }
                                }
                            }

                            var dataBookmark = dobj["resource"]["options"];
                            if (dataBookmark != null)
                            {
                                bookmarks = dataBookmark["bookmarks"][0];
                            }
                        }
                    }

                    streamReader.Close();
                    streamReader.Dispose();
                }
            }
            catch (Exception ex) { }
            return(model);
        }
예제 #20
0
        public bool CrawlData(string Id, string createdBy, ref string msg)
        {
            NSLog.Logger.Info("CrawlData: " + Id);
            var      model    = new CMS_CrawlerModels();
            var      sequence = 0;
            var      key      = "";
            var      _cookie  = "";
            DateTime lastdate = DateTime.Now.AddDays(-7);
            DateTime datenow  = DateTime.Now;

            var result = true;

            try
            {
                using (var _db = new CMS_Context())
                {
                    /* get key by ID */
                    var keyWord = _db.CMS_KeyWord.Where(o => o.ID == Id).FirstOrDefault();
                    if (keyWord != null)
                    {
                        sequence = keyWord.Sequence;
                        key      = keyWord.KeyWord;
                        /* check time span crawl */
                        var timeSpanCrawl = DateTime.Now - keyWord.UpdatedDate;
                        if (timeSpanCrawl.Value.TotalMinutes > 5 || keyWord.UpdatedDate == keyWord.CreatedDate) /* 5min to crawl data again */
                        {
                            /* update crawer date */
                            var bkTime = keyWord.UpdatedDate;
                            keyWord.UpdatedDate = DateTime.Now;
                            keyWord.UpdatedBy   = createdBy;
                            _db.SaveChanges();

                            /* call drawler api to crawl data */
                            CMSPinFactory _fac = new CMSPinFactory();

                            var listAcc    = _db.CMS_Account.Where(o => o.Status == (byte)Commons.EStatus.Active && o.IsActive && !string.IsNullOrEmpty(o.Cookies)).ToList();
                            var listCookie = listAcc.Select(x => x.Cookies).ToList();
                            _cookie = CommonHelper.RamdomCookie(listCookie);
                            /* crawler tab post */
                            var    PageSize  = Convert.ToInt32(Commons.PageSize);
                            var    modelPost = new CMS_CrawlerModels();
                            string q         = "keywords_search(" + keyWord.KeyWord.Replace(" ", "+") + ")";
                            string ref_path  = "/search/str/" + keyWord.KeyWord + "/stories-keyword/stories-public";
                            //CrawlerFBToolHelpers.CrawlerNow(q, ref_path, "list", (byte)Commons.EType.Post, _cookie, PageSize, ref modelPost);
                            //string q = "stories-public(stories-keyword(" + keyWord.KeyWord + "))";
                            //string ref_path = "/search/str/" + keyWord.KeyWord + "/stories-keyword/stories-public";
                            NSLog.Logger.Info("done crawler tab post : ", modelPost.Pins.Count);
                            if (modelPost.Pins != null && modelPost.Pins.Any())
                            {
                                model.Pins.AddRange(modelPost.Pins);
                            }
                            /* crawler tab people */
                            var modelPeople = new CMS_CrawlerModels();
                            q        = "stories-opinion(stories-keyword(" + keyWord.KeyWord + "))";
                            ref_path = "/search/str/" + keyWord.KeyWord + "/stories-keyword/stories-opinion";
                            //CrawlerFBToolHelpers.CrawlerNow(q, ref_path, "list", (byte)Commons.EType.People, _cookie, PageSize, ref modelPeople);
                            NSLog.Logger.Info("done crawler tab people : ", modelPeople.Pins.Count);
                            if (modelPeople.Pins != null && modelPeople.Pins.Any())
                            {
                                model.Pins.AddRange(modelPeople.Pins);
                            }

                            /* crawler tab photo */
                            var modelPhoto = new CMS_CrawlerModels();
                            q        = "photos-keyword(" + keyWord.KeyWord.Replace(" ", "+") + ")";
                            ref_path = "/search/str/" + keyWord.KeyWord.Replace(" ", "+") + "/photos-keyword";
                            CrawlerFBToolHelpers.CrawlerNow(q, ref_path, "grid", (byte)Commons.EType.Photo, _cookie, 70, ref modelPhoto);



                            /*crawler detail tab photo */
                            PinsModels refmodelPhoto = new PinsModels();
                            var        options       = new ParallelOptions {
                                MaxDegreeOfParallelism = 10
                            };
                            //for (int i = 0; i < modelPhoto.Pins.Count; i++)
                            //{
                            //    CrawlerFBToolHelpers.CrawlerDetail(modelPhoto.Pins[i].PhotoID, _cookie, (byte)Commons.EType.Photo, ref refmodelPhoto);
                            //}
                            Parallel.ForEach(modelPhoto.Pins, options, pin =>
                            {
                                CrawlerFBToolHelpers.CrawlerDetail(pin.PhotoID, _cookie, (byte)Commons.EType.Photo, ref pin);
                            });
                            NSLog.Logger.Info("done crawler tab photo : ", modelPhoto.Pins.Count);
                            if (modelPhoto.Pins != null && modelPhoto.Pins.Any())
                            {
                                model.Pins.AddRange(modelPhoto.Pins);
                            }
                            var res = false;
                            if (model.Pins.Count > 0)
                            {
                                NSLog.Logger.Info("done crawler before 7 days ago : ", model.Pins.Count);
                                /* check 7 days ago */
                                model.Pins = model.Pins.Where(o => o.Created_At >= lastdate && o.Created_At <= datenow).ToList();
                                NSLog.Logger.Info("done crawler after 7 days ago : ", model.Pins.Count);

                                Parallel.ForEach(model.Pins, options, pin =>
                                {
                                    if (pin.Type != (byte)Commons.EType.Photo)
                                    {
                                        CrawlerFBToolHelpers.CrawlerDetail(pin.PhotoID, _cookie, (byte)Commons.EType.Post, ref pin);
                                    }
                                });

                                res = _fac.CreateOrUpdate(model.Pins, keyWord.ID, createdBy, keyWord.KeyWord, ref msg);
                            }

                            if (res == false)
                            {
                                /* back to last crawl data */
                                //keyWord.UpdatedDate = bkTime;
                                //_db.SaveChanges();
                                result = false;
                            }
                            else
                            {
                                keyWord.UpdatedDate = DateTime.Now;
                                _db.SaveChanges();
                            }
                        }
                    }
                }

                LogHelper.WriteLogs(sequence.ToString() + " " + key, "Num post: " + model.Pins.Count().ToString());
                NSLog.Logger.Info("ResponseCrawlData", result.ToString());
            }
            catch (Exception ex)
            {
                msg    = "Crawl data is unsuccessfully.";
                result = false;

                LogHelper.WriteLogs("ErrorCrawlData: " + Id, JsonConvert.SerializeObject(ex));
                NSLog.Logger.Error("ErrorCrawlData: " + Id, ex);
            }

            return(result);
        }