/// <summary> /// http://z.hc360.com/getmmtlast.cgi?dt=1&w=单开&v=59&e=1&c=供应信息&n=2&m=2&H=1&bt=0 /// </summary> /// <param name="args">url参数</param> public void DataReceive(DataReceivedEventArgs args) { var html = WenShuAppHelper.GetWenShuDecode(args.Html.Replace("JSON=", "").Replace("\"", "")); //修正为Jobject可用的对象 html = "{\"data\":" + html + "}"; JObject jsonObj = JObject.Parse(html); //获取查询条件 var searchCondition = args.urlInfo.PostData; JObject searchJsonObj = JObject.Parse(searchCondition); var conditon = searchJsonObj["condition"].ToString(); var court = GetConditionParam(conditon, "法院名称").Trim(); var reason = GetConditionParam(conditon, "案由").Trim(); var caseType = GetConditionParam(conditon, "案件类型").Trim(); var fileType = GetConditionParam(conditon, "文书类型").Trim(); var judgeLayer = GetConditionParam(conditon, "审判程序").Trim(); var data = jsonObj["data"]; if (data != null) { var insert = 0; var update = 0; Console.WriteLine("获得数据:{0}", data.ToList().Count); foreach (var entInfo in data.ToList()) { BsonDocument document = MongoDB.Bson.Serialization.BsonSerializer.Deserialize <BsonDocument>(entInfo.ToString()); var guid = document.Text("文书ID"); if (!guidFilter.Contains(guid) && !hasExistObj(guid)) { document.Set("guid", guid); document.Set("cityName", cityName); document.Set("reason", reason); insert++; guidFilter.Add(guid); DBChangeQueue.Instance.EnQueue(new StorageData() { Document = document, Name = DataTableName, Type = StorageType.Insert }); //增加reason匹配 var hitReason = reasonList.Where(c => c.Text("name").Trim() == reason).FirstOrDefault(); if (hitReason != null) { var hitCount = hitReason.Int("count") + 1; hitReason.Set("count", hitCount); DBChangeQueue.Instance.EnQueue(new StorageData() { Document = new BsonDocument().Add("count", hitCount), Name = DataTableNameReason, Type = StorageType.Update, Query = Query.EQ("guid", hitReason.Text("guid")) }); } } else//更新目录 { update++; } } var skip = (int)searchJsonObj["skip"]; var limit = (int)searchJsonObj["limit"]; Console.WriteLine("获得{4}skip:{5}keyword:{6}{7}{8}|数据{3},添加:{0} 更新{1}剩余url:{2}", insert, update, UrlQueue.Instance.Count, data.ToList().Count, court.Replace("人民", "").Replace("法院", ""), skip, reason, caseType, fileType); if (data.ToList().Count >= pageSize && skip < 200) { DBChangeQueue.Instance.EnQueue(new StorageData() { Document = new BsonDocument().Add("currenNum", skip.ToString()), Name = DataTableNameCourt, Type = StorageType.Update, Query = Query.EQ("court", court) }); skip = skip + pageSize; searchJsonObj["skip"] = skip.ToString(); var postData = searchJsonObj.ToString(); UrlQueue.Instance.EnQueue(new UrlInfo(args.Url) { Depth = 1, PostData = postData }); } else { if (skip >= 200)//增加筛选关键字》案件类型》文书类型 { var isNewUrl = InitNextUrl(searchJsonObj); if (!isNewUrl) { //条件增加时间筛选 DBChangeQueue.Instance.EnQueue(new StorageData() { Document = new BsonDocument().Add("condition", args.urlInfo.PostData), Name = DataTableNameURL, Type = StorageType.Insert }); } } else { //Console.WriteLine("{0}爬取结束", court); //DBChangeQueue.Instance.EnQueue(new StorageData() { Document = new BsonDocument().Add("isUpdate", "1"), Name = DataTableNameCourt, Type = StorageType.Update, Query = Query.EQ("court", court) }); } } } }
public void SettingInit()//进行Settings.SeedsAddress Settings.HrefKeywords urlFilterKeyWord 基础设定 { string curUrl = string.Format(materialUrl + "?cityName=" + HttpUtility.UrlEncode(cityName)); //种子地址需要加布隆过滤 reqtoken = WenShuAppHelper.GetRequestToken(); //Settings.Depth = 4; //代理ip模式 Settings.IPProxyList = new List <IPProxy>(); //var ipProxyList = dataop.FindAllByQuery("IPProxy", Query.NE("status", "1")).ToList(); // Settings.IPProxyList.AddRange(ipProxyList.Select(c => new IPProxy(c.Text("ip"))).Distinct()); // Settings.IPProxyList.Add(new IPProxy("1.209.188.180:8080")); Settings.IgnoreSucceedUrlToDB = true; Settings.ThreadCount = 5; Settings.DBSaveCountLimit = 1; Settings.MaxReTryTimes = 10; Settings.IgnoreFailUrl = true; Settings.AutoSpeedLimit = true; Settings.AutoSpeedLimitMaxMSecond = 1000; //Settings.CurWebProxy = GetWebProxy(); Settings.AccessToken = reqtoken; Settings.CrawlerClassName = "WenShuAPPCrawler";//需要进行token替换 Settings.ContentType = "application/json"; this.Settings.UserAgent = "Dalvik/1.6.0 (Linux; U; Android 4.4.2; GT-I9300 Build/KOT49H)"; Settings.PostEncoding = Encoding.UTF8; Settings.Referer = "wenshuapp.court.gov.cn"; var allCourtList = dataop.FindAllByQuery(DataTableNameCourt, Query.And(Query.NE("isUpdate", "1"), Query.Matches("region", cityName))).SetFields("court", "leval").OrderByDescending(c => c.Int("leval")).ToList(); reasonList = dataop.FindAllByQuery(DataTableNameReason, Query.EQ("isLeaf", "1")).ToList(); foreach (var court in allCourtList)//法庭 { var courtCondition = GenConditionStr("法院名称", court.Text("court")); //foreach (var fileType in fileTypeList)//文书类型10 { // var fileTypeCondition=GenConditionStr("文书类型", fileType); // foreach (var caseType in caseTypeList)//案件类型5 { // var caseTypeCondition = GenConditionStr("案件类型", caseType); foreach (var reasonDoc in reasonList)//20 { var reason = reasonDoc.Text("name"); var conditionList = new List <string>(); var keyWordCondition = GenConditionStr("案由", reason); conditionList.Add(courtCondition); //conditionList.Add(fileTypeCondition); //conditionList.Add(caseTypeCondition); conditionList.Add(keyWordCondition); var conditionStr = GenConditionStr(conditionList); var searchDoc = GenSearchStr(conditionStr); var postData = searchDoc.ToJson(); UrlQueue.Instance.EnQueue(new UrlInfo(curUrl) { Depth = 1, PostData = postData }); } } } } //var testUrl = "http://z.hc360.com/getmmtlast.cgi?dt=1&w=外墙面砖&v=59&e=100&c=供应信息&n=3101&m=2&H=1&bt=0"; //var testAuthorization = appHelper.GetHuiCongAuthorizationCode(testUrl); //UrlQueue.Instance.EnQueue(new UrlInfo(testUrl) { Depth = 1, Authorization = testAuthorization }); Console.WriteLine("正在加载账号数据"); //Settings.HrefKeywords.Add(string.Format("/market/"));//先不加其他的 //Settings.HrefKeywords.Add(string.Format("data/land/_________0_"));//先不加其他的 //是否guid //不进行地址爬取 Settings.RegularFilterExpressions.Add(@"luckymnXXXXXXXXXXXXXXXXXX"); if (SimulateLogin()) { // Console.WriteLine("zluckymn模拟登陆成功"); } else { Console.WriteLine("模拟登陆失败"); } }