Ejemplo n.º 1
0
        /// <summary>
        /// http://z.hc360.com/getmmtlast.cgi?dt=1&w=单开&v=59&e=1&c=供应信息&n=2&m=2&H=1&bt=0
        ///  </summary>
        /// <param name="args">url参数</param>
        public void DataReceive(DataReceivedEventArgs args)
        {
            var html = WenShuAppHelper.GetWenShuDecode(args.Html.Replace("JSON=", "").Replace("\"", ""));

            //修正为Jobject可用的对象
            html = "{\"data\":" + html + "}";
            JObject jsonObj = JObject.Parse(html);
            //获取查询条件
            var     searchCondition = args.urlInfo.PostData;
            JObject searchJsonObj   = JObject.Parse(searchCondition);
            var     conditon        = searchJsonObj["condition"].ToString();

            var court      = GetConditionParam(conditon, "法院名称").Trim();
            var reason     = GetConditionParam(conditon, "案由").Trim();
            var caseType   = GetConditionParam(conditon, "案件类型").Trim();
            var fileType   = GetConditionParam(conditon, "文书类型").Trim();
            var judgeLayer = GetConditionParam(conditon, "审判程序").Trim();

            var data = jsonObj["data"];

            if (data != null)
            {
                var insert = 0;
                var update = 0;
                Console.WriteLine("获得数据:{0}", data.ToList().Count);
                foreach (var entInfo in data.ToList())
                {
                    BsonDocument document = MongoDB.Bson.Serialization.BsonSerializer.Deserialize <BsonDocument>(entInfo.ToString());
                    var          guid     = document.Text("文书ID");
                    if (!guidFilter.Contains(guid) && !hasExistObj(guid))
                    {
                        document.Set("guid", guid);
                        document.Set("cityName", cityName);
                        document.Set("reason", reason);
                        insert++;
                        guidFilter.Add(guid);
                        DBChangeQueue.Instance.EnQueue(new StorageData()
                        {
                            Document = document, Name = DataTableName, Type = StorageType.Insert
                        });
                        //增加reason匹配
                        var hitReason = reasonList.Where(c => c.Text("name").Trim() == reason).FirstOrDefault();
                        if (hitReason != null)
                        {
                            var hitCount = hitReason.Int("count") + 1;
                            hitReason.Set("count", hitCount);
                            DBChangeQueue.Instance.EnQueue(new StorageData()
                            {
                                Document = new BsonDocument().Add("count", hitCount), Name = DataTableNameReason, Type = StorageType.Update, Query = Query.EQ("guid", hitReason.Text("guid"))
                            });
                        }
                    }
                    else//更新目录
                    {
                        update++;
                    }
                }



                var skip  = (int)searchJsonObj["skip"];
                var limit = (int)searchJsonObj["limit"];
                Console.WriteLine("获得{4}skip:{5}keyword:{6}{7}{8}|数据{3},添加:{0} 更新{1}剩余url:{2}", insert, update, UrlQueue.Instance.Count, data.ToList().Count, court.Replace("人民", "").Replace("法院", ""), skip, reason, caseType, fileType);
                if (data.ToList().Count >= pageSize && skip < 200)
                {
                    DBChangeQueue.Instance.EnQueue(new StorageData()
                    {
                        Document = new BsonDocument().Add("currenNum", skip.ToString()), Name = DataTableNameCourt, Type = StorageType.Update, Query = Query.EQ("court", court)
                    });
                    skip = skip + pageSize;
                    searchJsonObj["skip"] = skip.ToString();
                    var postData = searchJsonObj.ToString();
                    UrlQueue.Instance.EnQueue(new UrlInfo(args.Url)
                    {
                        Depth = 1, PostData = postData
                    });
                }
                else
                {
                    if (skip >= 200)//增加筛选关键字》案件类型》文书类型
                    {
                        var isNewUrl = InitNextUrl(searchJsonObj);
                        if (!isNewUrl)
                        {
                            //条件增加时间筛选
                            DBChangeQueue.Instance.EnQueue(new StorageData()
                            {
                                Document = new BsonDocument().Add("condition", args.urlInfo.PostData), Name = DataTableNameURL, Type = StorageType.Insert
                            });
                        }
                    }
                    else
                    {
                        //Console.WriteLine("{0}爬取结束", court);
                        //DBChangeQueue.Instance.EnQueue(new StorageData() { Document = new BsonDocument().Add("isUpdate", "1"), Name = DataTableNameCourt, Type = StorageType.Update, Query = Query.EQ("court", court) });
                    }
                }
            }
        }
Ejemplo n.º 2
0
        public void SettingInit()//进行Settings.SeedsAddress Settings.HrefKeywords urlFilterKeyWord 基础设定
        {
            string curUrl = string.Format(materialUrl + "?cityName=" + HttpUtility.UrlEncode(cityName));

            //种子地址需要加布隆过滤
            reqtoken = WenShuAppHelper.GetRequestToken();
            //Settings.Depth = 4;
            //代理ip模式
            Settings.IPProxyList = new List <IPProxy>();
            //var ipProxyList = dataop.FindAllByQuery("IPProxy", Query.NE("status", "1")).ToList();
            // Settings.IPProxyList.AddRange(ipProxyList.Select(c => new IPProxy(c.Text("ip"))).Distinct());
            // Settings.IPProxyList.Add(new IPProxy("1.209.188.180:8080"));
            Settings.IgnoreSucceedUrlToDB     = true;
            Settings.ThreadCount              = 5;
            Settings.DBSaveCountLimit         = 1;
            Settings.MaxReTryTimes            = 10;
            Settings.IgnoreFailUrl            = true;
            Settings.AutoSpeedLimit           = true;
            Settings.AutoSpeedLimitMaxMSecond = 1000;
            //Settings.CurWebProxy = GetWebProxy();
            Settings.AccessToken      = reqtoken;
            Settings.CrawlerClassName = "WenShuAPPCrawler";//需要进行token替换
            Settings.ContentType      = "application/json";
            this.Settings.UserAgent   = "Dalvik/1.6.0 (Linux; U; Android 4.4.2; GT-I9300 Build/KOT49H)";
            Settings.PostEncoding     = Encoding.UTF8;
            Settings.Referer          = "wenshuapp.court.gov.cn";

            var allCourtList = dataop.FindAllByQuery(DataTableNameCourt, Query.And(Query.NE("isUpdate", "1"), Query.Matches("region", cityName))).SetFields("court", "leval").OrderByDescending(c => c.Int("leval")).ToList();

            reasonList = dataop.FindAllByQuery(DataTableNameReason, Query.EQ("isLeaf", "1")).ToList();
            foreach (var court in allCourtList)//法庭
            {
                var courtCondition = GenConditionStr("法院名称", court.Text("court"));
                //foreach (var fileType in fileTypeList)//文书类型10
                {
                    //  var fileTypeCondition=GenConditionStr("文书类型", fileType);
                    // foreach (var caseType in caseTypeList)//案件类型5
                    {
                        //  var caseTypeCondition = GenConditionStr("案件类型", caseType);
                        foreach (var reasonDoc in reasonList)//20
                        {
                            var reason           = reasonDoc.Text("name");
                            var conditionList    = new List <string>();
                            var keyWordCondition = GenConditionStr("案由", reason);
                            conditionList.Add(courtCondition);
                            //conditionList.Add(fileTypeCondition);
                            //conditionList.Add(caseTypeCondition);
                            conditionList.Add(keyWordCondition);
                            var conditionStr = GenConditionStr(conditionList);
                            var searchDoc    = GenSearchStr(conditionStr);
                            var postData     = searchDoc.ToJson();
                            UrlQueue.Instance.EnQueue(new UrlInfo(curUrl)
                            {
                                Depth = 1, PostData = postData
                            });
                        }
                    }
                }
            }


            //var testUrl = "http://z.hc360.com/getmmtlast.cgi?dt=1&w=外墙面砖&v=59&e=100&c=供应信息&n=3101&m=2&H=1&bt=0";
            //var testAuthorization = appHelper.GetHuiCongAuthorizationCode(testUrl);
            //UrlQueue.Instance.EnQueue(new UrlInfo(testUrl) { Depth = 1, Authorization = testAuthorization });
            Console.WriteLine("正在加载账号数据");
            //Settings.HrefKeywords.Add(string.Format("/market/"));//先不加其他的
            //Settings.HrefKeywords.Add(string.Format("data/land/_________0_"));//先不加其他的
            //是否guid
            //不进行地址爬取
            Settings.RegularFilterExpressions.Add(@"luckymnXXXXXXXXXXXXXXXXXX");

            if (SimulateLogin())
            {
                //  Console.WriteLine("zluckymn模拟登陆成功");
            }
            else
            {
                Console.WriteLine("模拟登陆失败");
            }
        }