public override string ToString() { StringBuilder tag = new StringBuilder(GetStartTag()); tag.Append(GetAttributeTag()); tag.Append(_Element.ToString()); tag.Append(GetEndTag()); return(tag.ToString()); }
private ExamItem BuildEntity(int moduleId, IHtmlElement item) { string selector = @"td[width]"; if (false == item.Exists(selector)) { return(null); } var id_element = item.FindFirst(selector); if (id_element == null) { return(null); } try { var match = Regex.Match(id_element.InnerText(), @"\d+"); int id = Convert.ToInt32(match.Value.ToString().Trim()); string title = id_element.Parent().FindFirst(@".MsoNormal>span").InnerText().Trim().RemoveHtml().RemoveHtmlEncode(); string answer = item.FindFirst("#answer").InnerText().Trim(); string strExamType = item.FindFirst(".st_title").InnerText(); string examType = Regex.Match(strExamType, @"(?<=、).*").Value.Trim(); if (answer.StartsWith("答案:")) { answer = answer.Substring(3).Trim(); } var itemType = ItemTypeService.GetByText(examType); if (itemType == null) { logger.Info(string.Format("未匹配的题目类别[{0}],ExamId=[{1}]", examType, id)); } var model = new ExamItem() { Id = id, Title = title, Answer = answer, OriginalHtml = item.InnerHtml(), Module_Id = moduleId, ItemType = itemType != null ? itemType.Id : 0, CreateTime = DateTime.Now, DelFlag = false }; return(model); } catch (Exception ex) { WriteLog(item.ToString(), ex.Message); throw; } }
public override string ToString() { StringBuilder tag = new StringBuilder("<caption"); if (_Align != null) { tag.Append(" align=\""); tag.Append(_Align); tag.Append("\""); } tag.Append(GetAttributeString()); tag.Append(">"); tag.Append(_Element.ToString()); tag.Append("</caption>\n"); return(tag.ToString()); }
private ExamItem BuildEntity(int moduleId, IHtmlElement item) { try { var id_element = item.FindFirst(@"tr[valign]>td"); var match = Regex.Match(id_element.InnerText(), @"\d+"); int id = Convert.ToInt32(match.Value.Trim()); string title = id_element.NextElement().InnerText().Trim().RemoveHtml().RemoveHtmlEncode(); string answer = item.FindFirst("div[id]").InnerText().Trim().RemoveHtmlEncode(); string examType = item.PreviousElement().Descendants("td") .ElementAt(1) .InnerText().Trim().RemoveHtml().RemoveHtmlEncode(); if (answer.StartsWith("答案:")) { answer = answer.Substring(3).Trim(); } var itemType = ItemTypeService.GetByText(examType); if (itemType == null) { logger.Info(string.Format("未匹配的题目类别[{0}],ExamId=[{1}]", examType, id)); } var model = new ExamItem() { Id = id, Title = title, Answer = answer, OriginalHtml = item.InnerHtml(), Module_Id = moduleId, ItemType = itemType != null ? itemType.Id : 0, CreateTime = DateTime.Now, DelFlag = false }; return(model); } catch (Exception ex) { WriteLog(item.ToString(), ex.Message); throw; } }
private void GetUrlText_2(IHtmlDocument document, List <Class1> L_Class) { IEnumerable <IHtmlElement> result1 = document.Find(".list-items"); foreach (var item in result1) { #region MyRegion Class1 _class = new Class1(); IHtmlElement item_a = item.FindFirst("a"); string img_str = item.Exists("img") ? item.FindFirst("img").Attribute("src").Value() : ""; _class.Image_Count = img_str.Length > 0 ? img_str.IndexOf("default.jpg") > 0?0:1:0; IEnumerable <IHtmlElement> div = item.Find("div"); List <string> ls = new List <string>(); foreach (var d in div) { ls.Add(d.InnerText()); } _class.TextName = ls[2]; _class.SumMoney = ls[4]; _class.Quyu = "赶集"; _class.Allpm = ls[1]; _class.Address = ls[0]; _class.href = item_a.Attribute("href").Value().Trim(); IEnumerable <IHtmlElement> ssa = item_a.Find("span"); string item_aa = item_a.ToString().Replace("<!--", "stu1").Replace("-->", "stp2"); item_aa = item_aa.Substring(item_aa.IndexOf("stu1") + 4); item_aa = item_aa.Substring(0, item_aa.IndexOf("stp2")); item_aa = item_aa.Substring(item_aa.IndexOf(">") + 1); item_aa = item_aa.Substring(0, item_aa.IndexOf("<")); _class.datetime = item_aa; string[] pm = _class.Allpm.Split(' '); double pm_int = Convert.ToDouble(pm[6].Replace('㎡', ' ').Trim().Length <= 0 ? pm[5].Replace('㎡', ' ').Trim() : pm[6].Replace('㎡', ' ').Trim()); double ss = ((Convert.ToDouble(_class.SumMoney.Replace("万元", "").Trim()) / pm_int)); _class.PingMoney = "≈" + Convert.ToInt32(ss * 10000).ToString(); _class.Image_str = _class.Image_Count > 0 ? "有" : string.Empty; L_Class.Add(_class); r++; #endregion } }
private ExamItem BuildEntity(int moduleId, IHtmlElement item) { try { var id_element = item.FindFirst(@"tr[valign]>td"); var match = Regex.Match(id_element.InnerText(), @"\d+"); int id = Convert.ToInt32(match.Value.Trim()); string title = id_element.NextElement().InnerText().Trim().RemoveHtml().RemoveHtmlEncode(); string answer = item.FindFirst("div[id]").InnerText().Trim().RemoveHtmlEncode(); string examType = item.PreviousElement().Descendants("td") .ElementAt(1) .InnerText().Trim().RemoveHtml().RemoveHtmlEncode(); if (answer.StartsWith("答案:")) { answer = answer.Substring(3).Trim(); } var itemType = ItemTypeService.GetByText(examType); if (itemType == null) { logger.Info(string.Format("未匹配的题目类别[{0}],ExamId=[{1}]", examType, id)); } var model = new ExamItem() { Id = id, Title = title, Answer = answer, OriginalHtml = item.InnerHtml(), Module_Id = moduleId, ItemType = itemType != null ? itemType.Id : 0, CreateTime = DateTime.Now, DelFlag = false }; return model; } catch (Exception ex) { WriteLog(item.ToString(), ex.Message); throw; } }
public override string ToString() { return(rootElement.ToString()); }
/// <summary> /// Gets the addresses for .png and .jpg files from "rg_meta" class "div" tags json. /// This is the way a top level Google Image search provides images. /// </summary> /// <param name="tag">The parsed HTML document tag.</param> private void GetImagesFromDivTags(HtmlTag tag) { try { // find looks recursively through the entire DOM tree IEnumerable <HtmlTag> inputFields = tag.Find(t => string.Equals(t.TagName, "div")); foreach (HtmlTag inputField in inputFields.Where(inputField => inputField.ContainsKey("class"))) { try { string divClass = inputField["class"]; if (divClass == "rg_meta") { if (inputField.Contents == null || !inputField.Contents.Any()) { continue; } IHtmlElement contents = inputField.Contents.First(); Dictionary <string, string> jsonValues = JsonConvert.DeserializeObject <Dictionary <string, string> >(contents.ToString()); if (!jsonValues.ContainsKey("ity")) { continue; } if (jsonValues["ity"] != "jpg" && jsonValues["ity"] != "png") { continue; } if (jsonValues.ContainsKey("ou")) { _imagesInPage.Add(jsonValues["ou"]); } } } catch (Exception) { // ignored } } } catch (Exception) { // ignored } }
private ExamItem BuildEntity(int moduleId, IHtmlElement item) { string selector = @"td[width]"; if (false == item.Exists(selector)) { return null; } var id_element = item.FindFirst(selector); if (id_element == null) { return null; } try { var match = Regex.Match(id_element.InnerText(), @"\d+"); int id = Convert.ToInt32(match.Value.ToString().Trim()); string title = id_element.Parent().FindFirst(@".MsoNormal>span").InnerText().Trim().RemoveHtml().RemoveHtmlEncode(); string answer = item.FindFirst("#answer").InnerText().Trim(); string strExamType = item.FindFirst(".st_title").InnerText(); string examType = Regex.Match(strExamType, @"(?<=、).*").Value.Trim(); if (answer.StartsWith("答案:")) { answer = answer.Substring(3).Trim(); } var itemType = ItemTypeService.GetByText(examType); if (itemType == null) { logger.Info(string.Format("未匹配的题目类别[{0}],ExamId=[{1}]", examType, id)); } var model = new ExamItem() { Id = id, Title = title, Answer = answer, OriginalHtml = item.InnerHtml(), Module_Id = moduleId, ItemType = itemType != null ? itemType.Id : 0, CreateTime = DateTime.Now, DelFlag = false }; return model; } catch (Exception ex) { WriteLog(item.ToString(), ex.Message); throw; } }