public QueryDataViewModel() { if (this.IsInDesignMode) { return; } SelectMeasureCommand = new ActionCommand(SelectMethod); QueryCommand = new ActionCommand(QueryWeightRecords); SupplementCommand = new ActionCommand(SupplementMethod); ShowPictureCommand = new ActionCommand(ShowPictureMethod); ShowVideoCommand = new ActionCommand(ShowVideoMethod); #region 获取配置文件路径 string basePath = System.IO.Path.Combine(System.AppDomain.CurrentDomain.BaseDirectory, "ClientConfig"); var configPath = System.IO.Path.Combine(basePath, "SystemConfig.xml"); #endregion #region 获取等待任务结果过时时间 int waitPrintResultTime = 1; string waitPrintResultTimeItem = ConfigurationManager.AppSettings["WaitPrintResultTime"].ToString(); string getWaitPrintResultTimeItem = XpathHelper.GetValue(configPath, waitPrintResultTimeItem); if (!string.IsNullOrEmpty(getWaitPrintResultTimeItem)) { waitPrintResultTime = Convert.ToInt32(getWaitPrintResultTimeItem); } #endregion #region 注册等待打印结果计时器 waitPrintReusltTimer = new Calculagraph(""); waitPrintReusltTimer.Timeout = waitPrintResultTime; waitPrintReusltTimer.TimeOver += new TimeoutCaller(waitPrintReusltTimer_TimeOver); #endregion }
/// <summary> /// 处理内容中的图片地址,如果是相对地址的则要加上地址前缀,构成完整的url /// </summary> /// <param name="contents"></param> /// <param name="prexUrl"></param> /// <returns></returns> public static string DealImgUrlPrex(string contents, string prexUrl) { var str = contents; try { //因为内容中的图片链接是相对地址,要改成绝对地址 var imgList = XpathHelper.GetAttrValueListByXPath(str, "//img", "src"); if (imgList != null && imgList.Count > 0) { foreach (var img in imgList) { try { if (!img.Contains("http")) { var newUrl = prexUrl + img.Replace("./", "/");; str = str.Replace(img, newUrl); } } catch (Exception ex) { } } } } catch (Exception ex) { } return(str); }
public void GetStringByString() { string source = "<a>Hello world</a>"; string result = XpathHelper.GetStringByString(source, "/a/text()"); Assert.Equal("Hello world", result); }
/// <summary> /// /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void Window_Loaded(object sender, RoutedEventArgs e) { //获取称点编码 _curClientCode = XpathHelper.GetValue(_sysConfigFile, _curClientCode); this.notifyIcon = new NotifyIcon(); //this.notifyIcon.BalloonTipText = "文件同步程序"; //设置程序启动时显示的文本 //this.notifyIcon.Text = "文件同步程序";//最小化到托盘时,鼠标点击时显示的文本 this.notifyIcon.BalloonTipText = ""; //设置程序启动时显示的文本 this.notifyIcon.Text = ""; //最小化到托盘时,鼠标点击时显示的文本 this.notifyIcon.Icon = Properties.Resources.FileSync; this.notifyIcon.Visible = true; //退出菜单项 System.Windows.Forms.MenuItem exit = new System.Windows.Forms.MenuItem("退出"); exit.Click += new EventHandler(Quit); //关联托盘控件 System.Windows.Forms.MenuItem[] childen = new System.Windows.Forms.MenuItem[] { exit }; notifyIcon.ContextMenu = new System.Windows.Forms.ContextMenu(childen); // notifyIcon.MouseDoubleClick += OnNotifyIconDoubleClick; //this.notifyIcon.ShowBalloonTip(1000); log.Info("注册托盘图标完成"); //读取照片配置 ConfigReader rf = new ConfigReader(_sysConfigFile); _curPhotoConfig = ConfigReader.ReadPhotoConfig(); log.Info("读取配置文件完成"); _curFtpServices = new FtpManager(); _curFtpServices.FtpUpDown(_curPhotoConfig.FtpIp, _curPhotoConfig.FtpUserName, _curPhotoConfig.FtpPassWord); log.Info("初始化文件服务器完成"); //定时器相关 curUploadTimer = new DispatcherTimer(); curUploadTimer.Interval = new TimeSpan(0, 1, 0); //一分钟检测一次 curUploadTimer.Tick += curUploadJpgTimer_Tick; curUploadTimer.Start(); ShowUpLoadInfo("开始文件同步"); WindowState = System.Windows.WindowState.Minimized; wsl = WindowState; // this.Visibility = System.Windows.Visibility.Hidden; this.Hide(); }
private string URL; //要执行的URL链接 //通用 public GeckofxWebbrower(Control control, DataGridView dgv, TaskInfoEntity taskInfoEntity, GeckofxWebbrowerType geckofxType, string URL) { //GeckoPreferences.User["gfx.font_rendering.graphite.enabled"] = false; geckoWebBrowser = new GeckoWebBrowser(); geckoWebBrowser.Parent = control; geckoWebBrowser.Dock = DockStyle.Fill; this.taskInfoEntity = taskInfoEntity; this.control = control; this.dgv = dgv; this.geckofxType = geckofxType; // 浏览类型 ruleStyle = new RuleStyle(); xpathHelper = new XpathHelper(); this.URL = URL; init(); }
public DtoNews NewsGathering(string newsUrl) { try { var title = ""; var content = ""; var pubTime = ""; var from = ""; var author = ""; var strNewContent = HttpHelper.GetContentByMobileAgent(newsUrl, Encoding.UTF8); content = XpathHelper.GetInnerHtmlByXPath(strNewContent, "//div[@id='artical_real']", "").Trim(); pubTime = XpathHelper.GetInnerHtmlByXPath(strNewContent, "//div[@id='artical_sth']/p/span[1]", ""); pubTime = StrHelper.FormatHtml(pubTime).Trim(); from = XpathHelper.GetInnerHtmlByXPath(strNewContent, "//div[@id='artical_sth']/p/span[3]/span", ""); from = StrHelper.FormatHtml(from).Trim(); author = XpathHelper.GetInnerHtmlByXPath(strNewContent, "//div[@id='artical_sth']/p/span[4]/span", ""); author = StrHelper.FormatHtml(author).Trim(); if (string.IsNullOrWhiteSpace(from)) { from = "凤凰佛教"; } content = DealContent(content); var news = new DtoNews { Contents = content, Title = title, PubTime = StrHelper.ToDateTime(pubTime), FromUrl = newsUrl, FromSiteName = from, Author = author, CreateTime = DateTime.Now, IsShow = 0, }; return(news); } catch (Exception ex) { Log.Error(newsUrl + " 错误:" + ex.Message + ex.StackTrace); } return(null); }
/// <summary> /// 下载图片 /// </summary> /// <param name="list"></param> public void DownloadImags(List <PictureModel> list) { if (list != null && list.Count > 0) { try { FtpManager fm = new FtpManager(); string basePath = System.IO.Path.Combine(System.AppDomain.CurrentDomain.BaseDirectory, "CarMeasureClient"); string clientConfigPath = System.IO.Path.Combine(basePath, list.First().equcode + ".xml");//正式用 #region 读取FTP配置 #region ftpIp string ftpIpItem = ConfigurationManager.AppSettings["FtpIp"].ToString(); string ftpIp = XpathHelper.GetValue(clientConfigPath, ftpIpItem); #endregion #region ftp用户名 string ftpUserNameItem = ConfigurationManager.AppSettings["FtpPicUserName"].ToString(); string ftpUserName = XpathHelper.GetValue(clientConfigPath, ftpUserNameItem); #endregion #region ftp密码 string ftpPasswordItem = ConfigurationManager.AppSettings["FtpPicPassWord"].ToString(); string ftpPassword = XpathHelper.GetValue(clientConfigPath, ftpPasswordItem); #endregion #endregion //fm.FtpUpDown(ftpIp, ftpUserName, ftpPassword); foreach (var item in list) { //byte[] imageData = fm.Download(item.photo); //item.image = GetImageByBytes(imageData); //item.image = GetImageByUrl(ftpIp, ftpUserName, ftpPassword, item.photo); item.FtpPhoto = "ftp://" + ftpUserName + ":" + ftpPassword + "@" + ftpIp + "/" + item.photo; } } catch //(Exception ex) { //this.ShowBusy = false; //this.ShowMessage("信息提示", "下载图片失败!原因:" + ex.Message, true, false); } } }
/// <summary> /// 初始化坐席信息 /// </summary> private void InitSeatInfo() { string configSet = ConfigurationManager.AppSettings["SysConfigFileName"].ToString(); string basePath = System.AppDomain.CurrentDomain.BaseDirectory; string configUrl = basePath + configSet; #region 读取坐席ID string seatIdMark = ConfigurationManager.AppSettings["SeatId"].ToString(); var seatId = XpathHelper.GetValue(configUrl, seatIdMark); #endregion #region 读取坐席名称 string seatNameMark = ConfigurationManager.AppSettings["SeatName"].ToString(); var seatName = XpathHelper.GetValue(configUrl, seatNameMark); #endregion LoginUser.Role = new Role() { Code = seatId, Name = seatName }; }
/// <summary> /// 主要处理内容里广告性质的文字及样式 /// </summary> /// <param name="contents"></param> /// <returns></returns> public static List <DtoNewsMedia> GetImgList(string contents) { var str = contents; var list = new List <DtoNewsMedia>(); try { //提取内容中的图片链接 var imgList = XpathHelper.GetAttrValueListByXPath(str, "//img", "src"); if (imgList != null && imgList.Count > 0) { var i = 0; foreach (var img in imgList) { try { var model = new DtoNewsMedia() { Description = "", IsShow = 1, NewsId = 0, Orders = i, PicOriginalUrl = img, PicUrl = img, ThumbnailUrl = img }; list.Add(model); } catch (Exception ex) { } i++; } } } catch (Exception ex) { } return(list); }
/// <summary> /// 根据搜索关键字搜索百家号的文章的url,再从文章取作者的url /// </summary> /// <param name="newsListUrl"></param> /// <param name="newsType"></param> /// <returns></returns> public int GatheringAuthorUrlFromSearch2(string keywords, int newsType, int searchPageIndex) { if (string.IsNullOrWhiteSpace(keywords)) { return(0); } //百家号地址计数器,如果当前搜索页百家号地址小于2则不再读取下一页数据 var iBjhCount = 0; //有效的百家号计数器 var iHaveValidBjh = 0; //每次循环没有百家号计数 var iContinueNo = 0; var strContent = ""; //贡献文章 总阅读数 作者文章 按时间 //keywords = keywords.Replace("贡献文章", "\"贡献文章\""); //keywords = keywords.Replace("总阅读数", "\"总阅读数\""); //keywords = keywords.Replace("作者文章", "\"作者文章\""); //keywords = keywords.Replace("按时间", "\"按时间\""); keywords = keywords.Replace("贡献文章 ", ""); keywords = keywords.Replace("贡献文章", ""); keywords = keywords.Replace("总阅读数 ", ""); keywords = keywords.Replace("总阅读数", ""); keywords = keywords.Replace("作者文章 ", ""); keywords = keywords.Replace("作者文章", ""); keywords = keywords.Replace("按时间", ""); //用来记录搜索关键字 var groupid = keywords; if (groupid.Length > 50) { groupid = groupid.Substring(0, 30); } //keywords = keywords.Replace(" ","").Replace("\\","").Replace("%20",""); keywords = keywords.Replace(" ", "%20"); //keywords = System.Web.HttpUtility.UrlEncode(keywords); //var site = "%20site%3Abaijiahao.baidu.com"; var inurl = "inurl%3Abaijiahao.baidu.com%20\"本文系作者授权百家号发表\""; var url = "https://www.baidu.com/s?wd=" + keywords + inurl; try { if (searchPageIndex > 0) { url += "&pn=" + searchPageIndex * 10; } Log.Info(url + " 搜索 页码" + searchPageIndex); #region === 取内容 === strContent = HttpHelper.GetContent(url, Encoding.UTF8); if (string.IsNullOrWhiteSpace(strContent)) { Thread.Sleep(1 * 1000); //重新请求一次,因为用了代理后,经常会失败 strContent = HttpHelper.GetContentByAgent(url, Encoding.UTF8); if (string.IsNullOrWhiteSpace(strContent)) { //HttpHelper.IsUseProxy = false; //重新请求一次,因为用了代理后,经常会失败 Thread.Sleep(1 * 1000); strContent = HttpHelper.GetContentByAgent(url, Encoding.UTF8); //HttpHelper.IsUseProxy = true; if (string.IsNullOrWhiteSpace(strContent)) { Log.Info(url + " 未抓取到任何内容 页码" + searchPageIndex); } } } #endregion //Log.Info("===========begin =============="+url + " " + searchPageIndex); //Log.Info(strContent); //Log.Info("===========end ==============" + url + " " + searchPageIndex); #region === deal baijiahao === if (!string.IsNullOrWhiteSpace(strContent)) { var lista = XpathHelper.GetOuterHtmlListByXPath(strContent, "//div[@class='f13']/a[1]"); if (lista != null && lista.Count > 0) { iBjhCount = 0; iHaveValidBjh = 0; foreach (var a in lista) { var href = XpathHelper.GetAttrValueByXPath(a, "//a", "href"); #region === deal baijiahao news url === Thread.Sleep(1 * 1000); var str = HttpHelper.GetContentByAgent(href, Encoding.UTF8); if (string.IsNullOrWhiteSpace(str)) { str = HttpHelper.GetContent(href, Encoding.UTF8); } //取百家号主页里的百家号名称,appid var author = ""; var appId = ""; if (!string.IsNullOrWhiteSpace(str)) { try { author = XpathHelper.GetInnerHtmlByXPath(str, "//div[@class='author-detail']/a/p", "").Replace("-百家号", ""); appId = XpathHelper.GetAttrValueByXPath(str, "//div[@class='author-detail']/a", "href"); //u?app_id=1546166210605725&fr=bjhvideo&wfr=spider if (!string.IsNullOrWhiteSpace(appId)) { var str2 = appId.Split('='); appId = str2[1].Replace("&fr", ""); } else { var iIndex = str.IndexOf("\"app_id\":"); if (iIndex > 0) { appId = str.Substring(iIndex + 10, 19).Replace("\",\"type", "").Replace("\"", "").Replace(",", "").Replace("type", ""); } else { #region === 重新取内容处理 === Thread.Sleep(1 * 1000); str = HttpHelper.GetContent(href, Encoding.UTF8); if (string.IsNullOrWhiteSpace(str)) { str = HttpHelper.GetContentByAgent(href, Encoding.UTF8); } if (!string.IsNullOrWhiteSpace(str)) { try { author = XpathHelper.GetInnerHtmlByXPath(str, "//div[@class='author-detail']/a/p", "").Replace("-百家号", ""); appId = XpathHelper.GetAttrValueByXPath(str, "//div[@class='author-detail']/a", "href"); //u?app_id=1546166210605725&fr=bjhvideo&wfr=spider if (!string.IsNullOrWhiteSpace(appId)) { var str2 = appId.Split('='); appId = str2[1].Replace("&fr", ""); } else { iIndex = str.IndexOf("\"app_id\":"); if (iIndex > 0) { appId = str.Substring(iIndex + 10, 19).Replace("\",\"type", "").Replace("\"", "").Replace(",", "").Replace("type", ""); } else { } } } catch { } } #endregion } } } catch (Exception ex) { } } else { Log.Info("取百家号主页内容没取到 href=" + href); } if (string.IsNullOrWhiteSpace(appId)) { Log.Info("appid没取到 内容如下=== begin === href=" + href); //Log.Info(str); Log.Info("appid没取到 内容如下=== end === href" + href); continue; } #region === 判断是否已存在 === var isHave = DalNews.IsExistsAuthor_Bjh(appId); if (!isHave) { iHaveValidBjh++; var model = new DtoAuthor() { Author = author, AuthorId = appId, GroupId = groupid, IntervalMinutes = 60, IsDeal = 0, IsShow = 0, LastDealTime = DateTime.Now, RefreshTimes = 0, Url = "http://baijiahao.baidu.com/u?app_id=" + appId, }; var id = DalNews.Insert_Author_Bjh(model); Log.Info("keyword" + keywords + "authodid=" + id); } else { //iHaveValidBjh = 0; Log.Info("appid" + appId + "已存在"); } #endregion #endregion } } } else { Log.Error("url=" + url + " 无内容" + DateTime.Now); } #endregion //如果当前页有百家号>=3则翻页,否则结束 if (iBjhCount >= 3) { //当翻页到后面且没有新的百家号时退出,不再翻页 if (iHaveValidBjh < 1 && searchPageIndex > 30) { return(0); } searchPageIndex++; GatheringAuthorUrlFromSearch(keywords, newsType, searchPageIndex); } } catch (Exception ex) { Log.Error("url=" + url + " " + DateTime.Now); Log.Error(ex.Message + ex.StackTrace); } return(0); }
private T OpenUbsLoginsTab <T>(params string[] tabItemNames) where T : new() { var tabItem = new UbsWebElement( By.XPath( $"{_ubsLoginsButton.Selector.GetLocator()}//following-sibling::ul//*[{XpathHelper.CombineFewTextPartsInOneWithOr(tabItemNames)}]")); return(OpenTab <T>(_ubsLoginsButton, tabItem)); }
private T OpenMainMenuTab <T>(UbsWebElement ubsMainMenuWebElement, params string[] tabItemNames) where T : new() { var tabItem = new UbsWebElement( By.XPath( $"{ubsMainMenuWebElement.Selector.GetLocator()}//following-sibling::div//a[{XpathHelper.CombineFewTextPartsInOneWithOr(tabItemNames)}]")); return(OpenTab <T>(ubsMainMenuWebElement, tabItem)); }
public List <DtoNewsUrlList> NewsUrlGathering(string newsListUrl, int newsType) { try { Log.Info(newsListUrl + " 抓取开始"); var strContent = HttpHelper.GetContentByMobileAgent(newsListUrl, Encoding.GetEncoding("gb2312")); if (string.IsNullOrWhiteSpace(strContent)) { Log.Info(newsListUrl + " 未抓取到任何内容"); return(null); } //取得标题列表 var strList = XpathHelper.GetInnerHtmlListByXPath(strContent, "//div[@class='leftList']/ul/li"); if (strList != null && strList.Count > 0) { foreach (var item in strList) { try { var url = XpathHelper.GetAttrValueByXPath(item, "//a", "href"); var title = XpathHelper.GetInnerHtmlByXPath(item, "//a", ""); title = StrHelper.FormatHtml(title).Trim(); var isHave = DalNews.IsExistsNews(title); //如果已存在则跳过 if (isHave) { continue; } if (newsType == 100 || newsType == 200 || newsType == 300) { #region === 根据详细页地址取新闻内容 === var news = NewsGathering(url); if (news != null) { news.NewsTypeId = newsType; news.Title = title; news.PubTime = StrHelper.ToDateTime(StrHelper.FormatPubTime(news.PubTime.ToString())); //入库 var newsId = DalNews.Insert(news); if (newsId < 1) { continue; } //从内容中提取img,存入newsmedia var mediaList = ImgDeal.GetImgList(news.Contents); if (mediaList != null && mediaList.Count > 0) { news.Contents = mediaList[0].Description; foreach (var picitem in mediaList) { picitem.NewsId = newsId; DalNews.InsertMedia(picitem); } } //休眠 控制抓取的频率 Random rnd = new Random(); var sleepSeconds = rnd.Next(30, 90); Thread.Sleep(sleepSeconds * 1000); } #endregion } if (newsType == 400) { #region === 根据详细页地址取图片内容 === var mediaList = NewsPicGathering(url); var news = new DtoNews() { Title = title, FromUrl = url, NewsTypeId = newsType, }; if (mediaList != null && mediaList.Count > 0) { news.Contents = mediaList[0].Description; //入库 var newsId = DalNews.Insert(news); foreach (var picitem in mediaList) { picitem.NewsId = newsId; DalNews.InsertMedia(picitem); } } //休眠 控制抓取的频率 Random rnd = new Random(); var sleepSeconds = rnd.Next(30, 90); Thread.Sleep(sleepSeconds * 1000); #endregion } Log.Info(url + " 抓取完成"); } catch (Exception ex) { //Log.Error("内容: " + item); Log.Error(ex.Message + ex.StackTrace); } } } Log.Info(newsListUrl + " 抓取结束"); return(null); } catch (Exception ex) { Log.Error(ex.Message + ex.StackTrace); } return(null); }
public DtoNews NewsGathering(string newsUrl) { try { var title = ""; var content = ""; var pubTime = ""; var from = ""; var author = ""; var picUrl = ""; var strNewContent = HttpHelper.GetContent(newsUrl, Encoding.GetEncoding("gb2312")); content = XpathHelper.GetInnerHtmlByXPath(strNewContent, "//div[@id='Cnt-Main-Article-QQ']", ""); //从content里去除最下面的广告部分 //var contentlast = StrHelper.GetStrByXPath(content, "//span[last()]", ""); //content = content.Replace(contentlast, ""); //content = Regex.Replace(content, contentlast, "", RegexOptions.IgnoreCase); //从content里去除最上面的分享部分 //var contentfirst = StrHelper.GetStrByXPath(strNewContent, "//div[@class='tit-bar clearfix']", ""); //content = content.Replace(contentfirst, ""); //content = content.Replace("<div class='tit-bar clearfix' bosszone='titleDown'></div>",""); content = content.Trim(); pubTime = XpathHelper.GetInnerHtmlByXPath(strNewContent, "//span[@class='article-time']", ""); pubTime = StrHelper.FormatHtml(pubTime).Trim(); from = "腾讯佛学"; //from = StrHelper.GetStrByXPath(strNewContent, "//span[@bosszone='jgname']/a", ""); //from = StrHelper.FormatHtml(from); var picUrlList = XpathHelper.GetAttrValueListByXPath(content, "//img", "src"); if (picUrlList != null && picUrlList.Count > 0) { picUrl = picUrlList[0]; } author = XpathHelper.GetInnerHtmlByXPath(strNewContent, "//div[@id='C-Main-Article-QQ']/div[1]/div/div[1]/span[5]", ""); author = StrHelper.FormatHtml(author).Trim(); //*[@id="Cnt-Main-Article-QQ"]/p/div[@r='1'] content = DealContent(content); var news = new DtoNews { Contents = content, Title = title, PubTime = StrHelper.ToDateTime(pubTime), FromUrl = newsUrl, FromSiteName = from, Author = author, CreateTime = DateTime.Now, IsShow = 1, LogoOriginalUrl = picUrl, LogoUrl = picUrl }; return(news); } catch (Exception ex) { Log.Error(newsUrl + " 错误:" + ex.Message + ex.StackTrace); } return(null); }
/// <summary> /// 读取app配置 /// </summary> /// <param name="pConfigName"></param> /// <returns></returns> internal static string ReadCfg(string pConfigName) { string temp = ConfigurationManager.AppSettings[pConfigName].ToString(); return(XpathHelper.GetValue(_configUrl, temp)); }
public void InsertDataGridRow(GeckofxWebbrowerType geckofxWebbrowerType, DataGridView dgv, GeckoWebBrowser geckoWebBrowser, params GeckoElement[] ele) { List <GeckoHtmlElement> geckoHtmlElementList = null; //设置xpath 获取html信息 if (ele.Length == 1) { var xpathTxt = xpathHelper.GetSmallXpath(ele[0]); var xresult = geckoWebBrowser.DomDocument.EvaluateXPath(xpathTxt); var nodes = xresult.GetNodes(); var elements = nodes.Select(x => x as GeckoElement).ToArray(); geckoHtmlElementList = XpathHelper.FindHtmlTxt(true, elements); } else { geckoHtmlElementList = XpathHelper.FindHtmlTxt(true, ele); } if (geckofxWebbrowerType.Equals(GeckofxWebbrowerType.General)) { foreach (GeckoHtmlElement element in geckoHtmlElementList) { DataGridViewRow row = new DataGridViewRow(); //文本 DataGridViewTextBoxCell fieldsNameCell = new DataGridViewTextBoxCell(); fieldsNameCell.Value = "字段" + (count++); row.Cells.Add(fieldsNameCell); DataGridViewTextBoxCell fieldsContentCell = new DataGridViewTextBoxCell(); fieldsContentCell.Value = element.TextContent; row.Cells.Add(fieldsContentCell); //下拉框 DataGridViewTextBoxCell fieldsTypeCell = new DataGridViewTextBoxCell(); fieldsTypeCell.Value = "抓取文本"; row.Cells.Add(fieldsTypeCell); DataGridViewImageCell deleteCell = new DataGridViewImageCell(); deleteCell.Value = Image.FromFile(@"E:\Project\C#\SimpleCrawlProject\SimpleCrawlApp\Resources\1108658.png"); row.Cells.Add(deleteCell); //隐藏浏览器句柄 DataGridViewTextBoxCell visibleGeckofxElementCell = new DataGridViewTextBoxCell(); visibleGeckofxElementCell.Value = element; row.Cells.Add(visibleGeckofxElementCell); //隐藏xpath DataGridViewTextBoxCell visibleGeckofxXpathCell = new DataGridViewTextBoxCell(); visibleGeckofxXpathCell.Value = xpathHelper.GetSmallXpath(element); row.Cells.Add(visibleGeckofxXpathCell); //将元素插入datagridview控件中 dgv.Rows.Add(row); } } else if (geckofxWebbrowerType.Equals(GeckofxWebbrowerType.ListDetails)) { } else if (geckofxWebbrowerType.Equals(GeckofxWebbrowerType.UrlModel)) { foreach (GeckoHtmlElement element in geckoHtmlElementList) { DataGridViewRow row = new DataGridViewRow(); //文本 DataGridViewTextBoxCell fieldsNameCell = new DataGridViewTextBoxCell(); fieldsNameCell.Value = "字段" + (count++); row.Cells.Add(fieldsNameCell); DataGridViewTextBoxCell fieldsContentCell = new DataGridViewTextBoxCell(); fieldsContentCell.Value = element.TextContent; row.Cells.Add(fieldsContentCell); //下拉框 DataGridViewTextBoxCell fieldsTypeCell = new DataGridViewTextBoxCell(); fieldsTypeCell.Value = "抓取详情页链接"; row.Cells.Add(fieldsTypeCell); DataGridViewImageCell deleteCell = new DataGridViewImageCell(); deleteCell.Value = Image.FromFile(@"E:\Project\C#\SimpleCrawlProject\SimpleCrawlApp\Resources\1108658.png"); row.Cells.Add(deleteCell); //隐藏浏览器句柄 DataGridViewTextBoxCell visibleGeckofxElementCell = new DataGridViewTextBoxCell(); visibleGeckofxElementCell.Value = element; row.Cells.Add(visibleGeckofxElementCell); //隐藏xpath DataGridViewTextBoxCell visibleGeckofxXpathCell = new DataGridViewTextBoxCell(); visibleGeckofxXpathCell.Value = xpathHelper.GetSmallXpath(element); row.Cells.Add(visibleGeckofxXpathCell); //将元素插入datagridview控件中 dgv.Rows.Add(row); } } }
public List <DtoNewsMedia> NewsPicGatheringOne(string newsUrl) { try { var list = new List <DtoNewsMedia>(); var content = ""; var picUrl = ""; var curPage = 0; var totalPage = 1; var strNewContent = HttpHelper.GetContentByMobileAgent(newsUrl, Encoding.UTF8); var strScriptList = XpathHelper.GetInnerHtmlListByXPath(strNewContent, "/html/body/script"); var strSrcipt = ""; if (strScriptList != null && strScriptList.Count > 0) { foreach (var str in strScriptList) { if (str.Contains("G_listdata")) { strSrcipt = str; break; } } } var istrart = strSrcipt.IndexOf('['); var iend = strSrcipt.IndexOf(']'); var strJson = strSrcipt.Substring(istrart, iend - istrart + 1); var imgList = JsonConvert.DeserializeObject <List <DtoIfengImg> >(strJson); //不通过网页html分析内容了,因为要的内容在script中 //var strcurPage = StrHelper.GetStrByXPath(strNewContent, "//div[@id='picTxt']/div/span[1]", ""); //curPage = Convert.ToInt32(StrHelper.FormatHtml(strcurPage)); //var strtotalPage = StrHelper.GetStrByXPath(strNewContent, "//div[@id='picTxt']/div/span[3]", ""); //totalPage = Convert.ToInt32(StrHelper.FormatHtml(strtotalPage)); //content = StrHelper.GetStrByXPath(strNewContent, "//div[@id='picTxt']/ul/li/p", ""); //content = StrHelper.FormatHtml(content); //picUrl = StrHelper.GetAttrValueByXPath(strNewContent, "//img[@id='photo']", "src"); //*[@id="picTxt"]/ul/li/p if (imgList != null && imgList.Count > 0) { curPage = 1; foreach (var img in imgList) { //id 临时用来记录总图片数 var model = new DtoNewsMedia { Description = img.title.Trim(), Orders = curPage, PicUrl = img.img, PicOriginalUrl = img.originalimg, IsShow = 1, Id = imgList.Count }; curPage++; list.Add(model); } } return(list); } catch (Exception ex) { Log.Error(newsUrl + " 错误:" + ex.Message + ex.StackTrace); } return(null); }
public void InsertDataGridRow(DataGridView dgv, GeckoWebBrowser geckoWebBrowser, params GeckoElement[] ele) { List <GeckoHtmlElement> geckoHtmlElementList = null; //设置xpath 获取html信息 if (ele.Length == 1) { var xpathTxt = xpathHelper.GetSmallXpath(ele[0]); var xresult = geckoWebBrowser.DomDocument.EvaluateXPath(xpathTxt); var nodes = xresult.GetNodes(); var elements = nodes.Select(x => x as GeckoElement).ToArray(); //foreach (GeckoElement item in elements) //{ // geckoElementList.Add(item); //} geckoHtmlElementList = XpathHelper.FindHtmlTxt(true, elements); } else { //foreach (GeckoElement item in ele) //{ // geckoElementList.Add(item); //} geckoHtmlElementList = XpathHelper.FindHtmlTxt(true, ele); } foreach (GeckoHtmlElement element in geckoHtmlElementList) { //if (dgv.Rows.Count > 1) //{ // for (int i = 0 ; i < dgv.Rows.Count ; i++) // { // GeckoHtmlElement ghe = (GeckoHtmlElement)dgv.Rows[i].Cells[dgv.Rows[0].Cells.Count - 1].Value; // if (ghe.Equals(element)) // { // continue; // } // } //} DataGridViewRow row = new DataGridViewRow(); //文本 DataGridViewTextBoxCell fieldsNameCell = new DataGridViewTextBoxCell(); fieldsNameCell.Value = "字段" + (count++); row.Cells.Add(fieldsNameCell); DataGridViewTextBoxCell fieldsContentCell = new DataGridViewTextBoxCell(); fieldsContentCell.Value = element.TextContent; row.Cells.Add(fieldsContentCell); //下拉框 DataGridViewTextBoxCell fieldsTypeCell = new DataGridViewTextBoxCell(); fieldsTypeCell.Value = "抓取文本"; row.Cells.Add(fieldsTypeCell); DataGridViewImageCell deleteCell = new DataGridViewImageCell(); deleteCell.Value = Image.FromFile(@"E:\Project\C#\SimpleCrawlProject\SimpleCrawlApp\Resources\1108658.png"); row.Cells.Add(deleteCell); //隐藏浏览器句柄 DataGridViewTextBoxCell visibleGeckofxElementCell = new DataGridViewTextBoxCell(); visibleGeckofxElementCell.Value = element; row.Cells.Add(visibleGeckofxElementCell); //隐藏xpath DataGridViewTextBoxCell visibleGeckofxXpathCell = new DataGridViewTextBoxCell(); visibleGeckofxXpathCell.Value = xpathHelper.GetSmallXpath(element); row.Cells.Add(visibleGeckofxXpathCell); //将元素插入datagridview控件中 dgv.Rows.Add(row); } }