string get_urls(IW2S_BaiduKeyword tsk) { string searchKeywords = tsk.Keyword.RemoveSpace().GetLower(); if (!string.IsNullOrEmpty(searchKeywords)) { string baiduUrlFormat = "http://www.baidu.com/s?ie=utf-8&wd={0}"; return(baiduUrlFormat.FormatStr(searchKeywords.GetUrlEncodedString("utf-8"))); } return(string.Empty); }
public void Query(IW2S_BaiduKeyword tsk) { var link = get_urls(tsk); if (string.IsNullOrEmpty(link)) { return; } GetLinks(link, tsk, 0); }
void GetLinks(string link, IW2S_BaiduKeyword tsk, int height) { string searchKeyword = tsk.Keyword.GetLower().RemoveSpace(); if (!string.IsNullOrEmpty(link)) { log(link); var html = get_html(link); if (html == null) { return; } var tags = html.SubAfter("相关搜索</div>").SubBefore("id=\"page\"").SplitWith("<a"); if (tags == null || tags.Length == 0) { log("BLOCKED " + tsk.Keyword); return; } foreach (var a in tags) { string title = a.GetTxtFromHtml2().RemoveSpace().GetLower(); string href = a.GetFirstHref2(); var searchKey = tsk.Keyword.ToLower(); if (string.IsNullOrEmpty(title) || string.IsNullOrWhiteSpace(href) || title == searchKey || !title.Contains(searchKey)) { continue; } IW2S_BaiduCommend baiduCommend = new IW2S_BaiduCommend { _id = "{0}{1}".FormatStr(tsk._id, title).ToObjectId(), CommendKeyword = title, CreatedAt = DateTime.UtcNow.AddHours(8), Keyword = tsk.Keyword, KeywordId = tsk._id, UsrId = tsk.UsrId, BotIntervalHours = 7 * 24, ProjectId = tsk.ProjectId }; saveBaiduKeyword(baiduCommend); //if (!string.IsNullOrWhiteSpace(href) && height < 1) //{ // GetLinks("https://www.baidu.com" + href, tsk, height+1); //} } } }
private void query(IW2S_BaiduKeyword p) { try { var builder = Builders <IW2S_BaiduKeyword> .Filter; try { BaiduKeywordQuery baidu = new BaiduKeywordQuery(p.Keyword); baidu.Query(p); } catch (Exception ex) { log(ex.Message); } } catch (Exception ex) { log(ex.Message); } }