コード例 #1
0
        public ResultDto InsertImgSearchTask(IW2S_ImgSearchTaskDto data)
        {
            ResultDto result = new ResultDto();

            var builder = Builders <IW2S_ImgSearchTask> .Filter;
            var col     = MongoDBHelper.Instance.GetIW2S_ImgSearchTasks();

            var usrObjId = MongoDBHelper.Instance.GetIW2S_Projects().Find(Builders <IW2S_Project> .Filter.Eq(x => x._id, new ObjectId(data.ProjectId))).Project(x => x.UsrId).FirstOrDefault();

            var filter = builder.Eq(x => x.ProjectId, new ObjectId(data.ProjectId)) & builder.Eq(x => x.Src, data.Src) & builder.Eq(x => x.IsDel, false);
            var task   = col.Find(filter).FirstOrDefault();

            if (task != null)
            {
                result.Message = "已经上传成功了";
                return(result);
            }
            IW2S_ImgSearchTask kw = new IW2S_ImgSearchTask
            {
                _id       = ObjectId.GenerateNewId(),
                CreatedAt = DateTime.Now.AddHours(8),
                ProjectId = new ObjectId(data.ProjectId),
                UsrId     = usrObjId,
                BotStatus = 0,
                Src       = data.Src,
                IsDel     = false
            };

            col.InsertOne(kw);

            IW2S_OperateLog log = new IW2S_OperateLog
            {
                CreatedAt        = DateTime.Now.AddHours(8),
                ProjectId        = new ObjectId(data.ProjectId),
                ShareOperateType = (int)ShareOperateType.AddKeyword,
                UserId           = new ObjectId(data.UsrId),
                SiteSource       = (int)SiteSource.BaiduImg
            };

            MongoDBHelper.Instance.GetIW2S_OperateLogs().InsertOne(log);

            result.IsSuccess = true;
            return(result);
        }
コード例 #2
0
        public void save_level1_links(List <IW2S_ImgSearchLink> links,
                                      IW2S_ImgSearchTask tsk)
        {
            if (links == null || links.Count == 0)
            {
                Console.WriteLine("SUCCESS saving 0 Level 1 Links for " + tsk.Src);
                return;
            }

            int pagesize = 100;
            int count    = 0;
            var col      = MongoDBHelper.Instance.GetIW2S_ImgSearchLinks();
            var builder  = Builders <IW2S_ImgSearchLink> .Filter;

            for (int page = 0; page *pagesize < links.Count; page++)
            {
                var list = links.Skip(page * pagesize).Take(pagesize).ToList();
                //list.ForEach(x => x._id = new MongoDB.Bson.ObjectId(IDHelper.GetGuid("{0}/&itemid={1}".FormatStr(x.Domain, x.LinkUrl)).ToString()));
                list = ListDistinctBy(list, x => x.BizId);

                FieldsDocument fd = new FieldsDocument();
                fd.Add("BizId", 1);

                List <ObjectId> bizIds      = list.Select(x => x.BizId).ToList();
                var             exists_objs = col.Find(builder.In(x => x.BizId, bizIds)).Project(x => x.BizId).ToList();
                List <ObjectId> exists_ids  = new List <ObjectId>();
                foreach (var result in exists_objs)
                {
                    exists_ids.Add(result);
                }
                if (exists_ids != null && exists_ids.Count > 0)
                {
                    list = list.Where(x => !exists_ids.Contains(x.BizId)).ToList();
                }
                if (list == null || list.Count == 0)
                {
                    continue;
                }
                count += pagesize;

                col.InsertMany(links);
                Console.WriteLine("SUCCESS saving " + links.Count + " Level 1 Links for " + tsk.Src);
            }
        }
コード例 #3
0
        private void query(IW2S_ImgSearchTask p)
        {
            //http://www.baidu.com/s?rtt=2&tn=baiduwb&wd=%E8%80%81%E9%85%B8%E5%A5%B6
            try
            {
                string http = "http://211.154.6.166:9000";
                //string baiduUrl = "http://image.baidu.com/n/pc_search?queryImageUrl=http://a.hiphotos.baidu.com/image/pic/item/f9dcd100baa1cd1162eeea1ab112c8fcc3ce2dab.jpg"
                string link = "http://image.baidu.com/n/pc_list?queryImageUrl={0}&pos=moresource#activeTab=1".FormatStr(p.Src);
                //string link = "http://image.baidu.com/n/pc_search?queryImageUrl={0}".FormatStr(p.Src);

                Console.WriteLine(link);
                var html = proxy.GetFastHtmlWithProxyIpAndARE(link, "utf-8").RemoveSpace();
                if (html == null)
                {
                    var update = new UpdateDocument {
                        { "$set", new QueryDocument {
                              { "BotStatus", 0 }
                          } }
                    };
                    var commendCol = MongoDBHelper.Instance.GetIW2S_ImgSearchTasks();
                    var result     = commendCol.UpdateOne(new QueryDocument {
                        { "_id", p._id }
                    }, update);
                    return;
                }
                var json    = "[{" + html.SubAfter("'sameList':").SubBefore("'sameSizeNum':").SubAfter("[{").SubBefore("}]") + "}]";
                var objImgs = JsonToObject(json);

                int rank = 1;
                foreach (var objImg in objImgs)
                {
                    objImg.fromPageTitle = RemoveInivalidChar(objImg.fromPageTitle.GetTxtFromHtml2().RemoveSpace().GetLower());
                    objImg.textHost      = RemoveInivalidChar(objImg.textHost.GetTxtFromHtml2().RemoveSpace().GetLower());
                    HanleTagData(p, objImg.fromPageTitle, objImg.fromURL, objImg.textHost, objImg.fromURLHost, objImg.objURL, rank);
                    rank++;
                }
            }


            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
        }
コード例 #4
0
        private void HanleTagData(IW2S_ImgSearchTask tsk, string title, string href, string abs, string domain, string src, int rank)
        {
            string realUrl = null, detailHtml = null;


            if (!string.IsNullOrWhiteSpace(href))
            {
                //Encoding enc = Encoding.UTF8;
                //detailHtml = HtmlQueryHelper.GetContent(href, 8000, ref enc, out realUrl);
                var tuplehtml = get_htmlUrl(href);
                if (tuplehtml != null && !string.IsNullOrEmpty(tuplehtml.Item1))
                {
                    realUrl = tuplehtml.Item1;
                }
                if (tuplehtml != null && !string.IsNullOrEmpty(tuplehtml.Item2))
                {
                    detailHtml = tuplehtml.Item2;
                }
                if (!string.IsNullOrEmpty(realUrl) && string.IsNullOrEmpty(domain))
                {
                    domain = GetDomain(realUrl);
                }
            }
            if (!string.IsNullOrEmpty(detailHtml) && detailHtml.Contains("document.getElementById(\"link\").click()"))
            {
                var gourl = detailHtml.GetFirstHref2();
                if (!string.IsNullOrEmpty(gourl))
                {
                    var tuplehtml = get_htmlUrl(gourl);
                    if (tuplehtml != null && !string.IsNullOrEmpty(tuplehtml.Item1))
                    {
                        realUrl = tuplehtml.Item1;
                    }
                    if (tuplehtml != null && !string.IsNullOrEmpty(tuplehtml.Item2))
                    {
                        detailHtml = tuplehtml.Item2;
                    }
                    if (!string.IsNullOrEmpty(realUrl) && string.IsNullOrEmpty(domain))
                    {
                        domain = GetDomain(realUrl);
                    }
                }
            }
            if (string.IsNullOrEmpty(realUrl))
            {
                realUrl = href;
            }
            if (string.IsNullOrEmpty(detailHtml))
            {
                return;
            }

            Regex reg = new Regex("(20\\d{2}[-/]\\d{1,2}[-/]\\d{1,2})|(20\\d{2}年\\d{1,2}月\\d{1,2}日)");
            Match m   = reg.Match(detailHtml);
            //MatchCollection cols = reg.Matches(item.Html);
            string time = "";

            if (m.Groups.Count > 0)
            {
                time = m.Groups[0].Value;
            }


            IW2S_ImgSearchLink l1 = new IW2S_ImgSearchLink
            {
                UsrId     = tsk.UsrId,
                Domain    = domain,
                TopDomain = GetLevel1Domain(domain),
                Src       = src,
                LinkUrl   = href,

                BizId = "{0}{1}".FormatStr(href, tsk._id.ToString()).ToObjectId(),
                IW2S_ImgSearchTaskId = tsk._id,
                CreatedAt            = DateTime.UtcNow.AddHours(8),
                Description          = abs,
                Title = title,

                ProjectId   = tsk.ProjectId,
                PublishTime = time,

                Rank = rank
            };



            save_level1_links(new List <IW2S_ImgSearchLink> {
                l1
            }, tsk);
        }