/// <summary> /// 从在线网页提取数据 /// </summary> /// <param name="i"></param> private static void GrabQDailyElement(string indexUrl) { QDailyInfo qDailyInfo = GetQDailyInfoFromOnlineURL(indexUrl); if (qDailyInfo != null) { qDailyList.AddToQDailyDic(qDailyInfo); } }
/// <summary> /// 添加到字典 /// </summary> /// <param name="movieInfo"></param> /// <returns></returns> public bool AddToQDailyDic(QDailyInfo qDailyInfo) { if (qDailyInfo != null && !_dicQDailyInfo.ContainsKey(qDailyInfo.OriginalUrl)) { FileHelper.WriteToJsonFile(_dicQDailyInfo.Values.ToList(), _qDailyJsonFilePath); LogHelper.Info("Add QDaily Success!"); return(_dicQDailyInfo.TryAdd(qDailyInfo.OriginalUrl, qDailyInfo)); } return(true); }
/// <summary> /// 从在线网页提取职位详细数据 /// </summary> /// <param name="onlineURL"></param> /// <returns></returns> private static QDailyInfo GetQDailyInfoFromOnlineURL(string onlineURL, bool isContainIntro = false) { try { var qDailyHTML = RequestHelper.HttpGet(onlineURL, Encoding.UTF8); if (string.IsNullOrEmpty(qDailyHTML) || qDailyHTML.Contains("error404")) { return(null); } var qDailyDoc = htmlParser.Parse(qDailyHTML); var detail = qDailyDoc.GetElementsByClassName("com-article-detail").FirstOrDefault(); var updatetime = detail.QuerySelector("span.smart-date").GetAttribute("data-origindate").ToString(); DateTime pubDate = default(DateTime); if (updatetime != null && !string.IsNullOrEmpty(updatetime)) { DateTime.TryParse(updatetime, out pubDate); } var articleName = detail.QuerySelector("h2.title").InnerHtml; var articleContent = detail.QuerySelector("div.detail").InnerHtml; var qDailyInfo = new QDailyInfo() { ArticleName = articleName, OriginalUrl = onlineURL, QDailyId = Int32.Parse(onlineURL.Split('/').LastOrDefault().Split('.').FirstOrDefault()), ArticleContent = articleContent, PubDate = pubDate, }; return(qDailyInfo); } catch (Exception ex) { LogHelper.Error("GetPositionInfoFromOnlineURL Exception", ex, new { OnloneURL = onlineURL }); return(null); } }