public IAsyncOperation <HtmlNode> TranslateAsync(string targetLangCode) { return(AsyncInfo.Run(async token => { var node = HtmlNode.CreateNode(Content.OuterHtml); foreach (var item in node.Descendants("#text")) { var data = item.GetInnerText(); var uri = $"https://translate.google.cn/translate_a/single?client=gtx&dt=t&ie=UTF-8&oe=UTF-8" + $"&sl=auto&tl={targetLangCode}&q={Uri.EscapeDataString(data)}"; var transRetHtml = await transClient.GetStringAsync(new Uri(uri)); var obj = JsonConvert.DeserializeObject <JArray>(transRetHtml); var objarr = (JArray)obj[0]; var translated = string.Concat(objarr.Select(a => a[0].ToString())); item.InnerHtml = HtmlEntity.Entitize(translated); } TranslatedContent = node; return node; })); }
/** -------------------------------------------------------------------- **/ public static string CleanHtmlText(string Text) { string CleanedText = Text; if (!string.IsNullOrEmpty(CleanedText)) { try { CleanedText = HtmlEntity.DeEntitize(CleanedText); } catch (Exception ex) { DebugMsgStatic(string.Format("CleanBodyText: {0}", ex.Message)); } CleanedText = CleanText(Text: CleanedText); } return(CleanedText); }
private IRestResponse <T> _request <T>(string http_method, string resource, dict data) where T : new() { var request = new RestRequest() { Resource = resource, RequestFormat = DataFormat.Json }; // add the parameters to the request foreach (KeyValuePair <string, string> kvp in data) { request.AddParameter(kvp.Key, HtmlEntity.Convert(kvp.Value), ParameterType.QueryString); } //set the HTTP method for this request switch (http_method.ToUpper()) { case "GET": request.Method = Method.GET; break; case "POST": request.Method = Method.POST; request.Parameters.Clear(); request.AddParameter("application/json", request.JsonSerializer.Serialize(data), ParameterType.RequestBody); break; case "DELETE": request.Method = Method.DELETE; break; default: request.Method = Method.GET; break; } IRestResponse <T> test = new RestResponse <T>(); client.ExecuteAsync <T>(request, response => { test = response.Data as IRestResponse <T>; }); return(test); }
/// <summary> /// 验证属性的值是否有效 /// </summary> /// <param name="attr"></param> /// <param name="value"></param> /// <returns></returns> public static bool ValidateAttribute(PolicyAttribute attr, string value) { if (attr == null || string.IsNullOrWhiteSpace(value)) { return(false); } value = HtmlEntity.DeEntitize(value.Trim()); ////验证是否在限定的值之内 if (attr.AllowedValues != null) { foreach (string allowedValue in attr.AllowedValues) { if (allowedValue != null && allowedValue.Equals(value, StringComparison.OrdinalIgnoreCase)) { return(true); } } } if (attr.AllowedRegExp != null) { ///验证是否符合指定的正则表达式 foreach (string ptn in attr.AllowedRegExp) { string pattern = ptn; if (!pattern.StartsWith("^")) { pattern = "^" + pattern; } if (!pattern.EndsWith("$")) { pattern = pattern + "$"; } if (Regex.IsMatch(value, pattern)) { return(true); } } } return(false); }
/// <summary> /// Searches for download links on the service. /// </summary> /// <param name="query">The name of the release to search for.</param> /// <returns>List of found download links.</returns> public override IEnumerable <Link> Search(string query) { var html = Utils.GetHTML(Site + "search.php?search=" + Utils.EncodeURL(query), request: req => { req.Accept = "*/*"; req.Headers[HttpRequestHeader.AcceptLanguage] = "en"; req.AutomaticDecompression = DecompressionMethods.None; }); var links = html.DocumentNode.SelectNodes("//table/tr/td[2]"); if (links == null) { yield break; } foreach (var node in links) { var link = new Link(this); link.Release = HtmlEntity.DeEntitize(node.InnerText).Trim(); link.Quality = FileNames.Parser.ParseQuality(link.Release); link.Size = HtmlEntity.DeEntitize(node.GetTextValue("../td[4]")).Trim().Replace("M", " MB"); link.Infos = HtmlEntity.DeEntitize(node.GetTextValue("../td[5]")).Trim(); var tdt = node.GetAttributeValue("title"); if (tdt.Contains("Nuked")) { var rgx = Regex.Match(HtmlEntity.DeEntitize(tdt), "<font color='red'>([^<]+)"); if (rgx.Success) { link.Infos += ", Nuked: " + rgx.Groups[1].Value; } } yield return(link); } }
static void Plain(StringBuilder builder, ref ToPlainTextState state, IEnumerable <HtmlNode> nodes) { foreach (var node in nodes) { if (node is HtmlTextNode text) { Process(builder, ref state, HtmlEntity.DeEntitize(text.Text).ToCharArray()); } else { var tag = node.Name.ToLower(); if (tag == "br") { builder.AppendDoubleLine(); state = ToPlainTextState.StartLine; } else if (NonVisibleTags.Contains(tag)) { } else if (InlineTags.Contains(tag)) { Plain(builder, ref state, node.ChildNodes); } else { if (state != ToPlainTextState.StartLine) { builder.AppendDoubleLine(); state = ToPlainTextState.StartLine; } Plain(builder, ref state, node.ChildNodes); if (state != ToPlainTextState.StartLine) { builder.AppendDoubleLine(); state = ToPlainTextState.StartLine; } } } } }
public override ArticalOverview[] ReadIndexPage(string url, HtmlDocument doc, out string nextPageUrl) { nextPageUrl = null; var container = doc.GetElementbyId("maincontent"); if (container == null) return null; var liTags = Helper.AllChild(container, "li"); if (liTags == null) return null; List<ArticalOverview> data = new List<ArticalOverview>(); foreach (var li in liTags) { var aLink = Helper.AnyChild(li, "a"); data.Add(new ArticalOverview() { LinkOfActualArtical = Helper.CombindUrl(webDir, aLink.GetAttributeValue("href", "")), Title = HtmlEntity.DeEntitize(aLink.InnerText) }); } var pageContainer = Helper.AnyChild(container, "td", new Dictionary<string, string>() { ["align"] = "center" }); if (pageContainer != null) { try { var currentPageIndex = int.Parse(Helper.AnyChild(pageContainer, "font").InnerText); var allLinks = Helper.AllChild(pageContainer, "a"); foreach (var link in allLinks) { if (link.InnerText == (currentPageIndex + 1).ToString()) nextPageUrl = Helper.CombindUrl(webDir, link.GetAttributeValue("href", "")); } } catch (Exception) { } } return data.Count > 0 ? data.ToArray() : null; }
private static string GetTextFromHtml(string html) { if (string.IsNullOrEmpty(html)) { return(""); } var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(html); var sb = StringExtensions.StringBuilderPool.Rent(); GetTextFromNodes(sb, htmlDoc.DocumentNode.ChildNodes); var result = HtmlEntity.DeEntitize(sb.ToString()); StringExtensions.StringBuilderPool.Return(sb); return(result); }
public string GetPrettyTitle() { var Nodes = GetPageInfo(); HtmlNode TargetNode = FindInfoNode("title"); if (TargetNode == null) { MessageBox.Show("Couldn't find title."); return(""); } foreach (var CurrNode in TargetNode.ChildNodes) { if (CurrNode.Attributes.Count > 0 && CurrNode.Attributes[0].Value == "pretty") { return(HtmlEntity.DeEntitize(CurrNode.InnerHtml)); } } return(""); }
/// <summary> /// HtmlAgilityPack to parse an HtmlDocument for text and return a string of text. /// </summary> /// <param name="doc"></param> /// <returns></returns> private string ParseHtmlDocumentText(HtmlDocument htmlDoc) { if (htmlDoc == null) { throw new ArgumentNullException("ParseHtmlDocumentText"); } string text = string.Empty; foreach (HtmlNode node in htmlDoc.DocumentNode.SelectNodes("./descendant-or-self::*[not(self::script or self::style)]/text()[not(normalize-space(.)='')]")) { if (!string.IsNullOrEmpty(node.InnerText.Trim())) { var temp = HtmlEntity.DeEntitize(node.InnerText); text += temp.Trim() + " "; } } return(text); }
public Dictionary <IHero, IStat> FetchMostPlayedHeroes(string playerId) { HtmlNode root = mainController.HtmlDocumentController.GetDotabuffPlayerRoot(playerId); IEnumerable <HtmlNode> mostPlayedHeroesNode = root.SelectNodes(PlayerPath.MostPlayedHeroes.List.Value); Dictionary <IHero, IStat> mostPlayedHeros = new Dictionary <IHero, IStat>(); for (int i = 1; i < mostPlayedHeroesNode.Count() + 1; i++) { string heroReference = HtmlEntity.DeEntitize( root.SelectSingleNode(mainController.CombinePathWithListCount(PlayerPath.MostPlayedHeroes.Hero.Value, i)) .Attributes[HtmlAttributes.LastPlayedMatches.ReferenceAttribute.Value].Value).Replace(HtmlAttributes.Hero.Replace.Value, ""); Hero hero = GetHero(heroReference); Stat stat = statController.MapHtmlNode(root, i); mostPlayedHeros.Add(hero, stat); } return(mostPlayedHeros); }
public static string GetAllInnerText(HtmlNode startNode, string htmlSeparator) { string allInnerText = string.Empty; var curNode = startNode.NextSibling; while (curNode != null) { //break out of the parse loop if we reach the next section if (curNode.OuterHtml.StartsWith(htmlSeparator)) { break; } else { string curInnerText = HtmlEntity.DeEntitize(curNode.InnerText); allInnerText += curInnerText; curNode = curNode.NextSibling; } } return(allInnerText); }
public string GetToken(string html) { var doc = new HtmlDocument(); doc.LoadHtml(html); var div = doc.DocumentNode.SelectSingleNode("//div[@data-react-class='Header']"); var content = div.GetAttributeValue("data-react-props", ""); content = HtmlEntity.DeEntitize(content); dynamic data = JObject.Parse(content); dynamic session = JObject.Parse(data.session.ToString()); var token = session.csrf.token.ToString(); return(token); //var token = doc.DocumentNode.SelectSingleNode("//input[@name='authenticity_token']"); //return token.GetAttributeValue("content", ""); }
public static string getHTTP(string url) { isError = false; ErrorMessage = ""; // now we can send out cookie along with a request for the protected page HttpWebRequest webRequest = ( HttpWebRequest )WebRequest.Create(url); webRequest.ContentType = "application/x-www-form-urlencoded"; webRequest.CookieContainer = cookies; StreamReader responseReader = new StreamReader(webRequest.GetResponse().GetResponseStream(), Encoding.UTF7); // and read the response string responseData = HtmlEntity.DeEntitize(responseReader.ReadToEnd()); responseReader.Close(); findErrors(responseData); return(responseData); }
internal List <AbilityBuild> FetchMostPopularAbilityBuild(string heroReference) { HtmlNode root = HtmlDocumentController.GetDotabuffHeroRoot(heroReference); IList <HtmlNode> mostPopularAbilityBuildNodes = root.SelectNodes(HeroPath.MostPopularAbilitBuild.Abilities.Value); IList <HtmlNode> mostPopularAbilityBuildIconNodes = root.SelectNodes(HeroPath.MostPopularAbilitBuild.Icons.Value); List <Ability> abilities = FetchAbilities(heroReference); List <AbilityBuild> abilityBuildList = new List <AbilityBuild>(); for (int i = 0; i < mostPopularAbilityBuildNodes.Count(); i++) { AbilityBuild abilityBuild = new AbilityBuild(); string abilityName = HtmlEntity.DeEntitize(mostPopularAbilityBuildIconNodes[i].Attributes["alt"].Value); abilityBuild.Ability = abilities.First(ability => ability.Name.Contains(abilityName)); abilityBuild.LevelBuild = new List <int>(); IEnumerable <HtmlNode> levelBuildNodes = mostPopularAbilityBuildNodes[i].Descendants("div") .Where( d => d.Attributes.Contains("class") && d.Attributes["class"].Value.Contains("entry choice")); List <int> abilityLevelBuild = new List <int>(); foreach (HtmlNode levelBuildNode in levelBuildNodes) { abilityLevelBuild.Add(int.Parse(levelBuildNode.InnerText)); } abilityBuild.LevelBuild = abilityLevelBuild; abilityBuildList.Add(abilityBuild); } return(abilityBuildList); }
public static Problem Parse(int year, int day, string url, string html, string input) { var document = new HtmlDocument(); document.LoadHtml(html); var md = $"original source: [{url}]({url})\n"; var answers = ""; foreach (var article in document.DocumentNode.SelectNodes("//article")) { md += UnparseList("", article) + "\n"; var answerNode = article.NextSibling; while (answerNode != null && !( answerNode.Name == "p" && answerNode.SelectSingleNode("./code") != null && answerNode.InnerText.Contains("answer")) ) { answerNode = answerNode.NextSibling; } var code = answerNode?.SelectSingleNode("./code"); if (code != null) { answers += code.InnerText + "\n"; } } var title = HtmlEntity.DeEntitize(document.DocumentNode.SelectNodes("//h2").First().InnerText); var match = Regex.Match(title, ".*: (.*) ---"); if (match.Success) { title = match.Groups[1].Value; } return(new Problem { Year = year, Day = day, Title = title, ContentMd = md, Input = input, Answers = answers }); }
public async Task SetData(ProductsCategory category) { try { IsLoading = true; HttpClient httpClient = new HttpClient(); var html = await httpClient.GetStringAsync(@"http://piranigroup.com.pk/product-2/"); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); var nodes = htmlDocument.DocumentNode.Descendants("div") .Where(x => x.GetAttributeValue("class", "").Equals(category.CategoryUrl)); foreach (var htmlNode in nodes) { var product = new Product { ProductName = HtmlEntity.DeEntitize(htmlNode.Descendants("h2").FirstOrDefault()?.InnerText), ProductImage = HtmlEntity.DeEntitize(htmlNode.Descendants("img").FirstOrDefault()? .ChildAttributes("src") .FirstOrDefault()? .Value) }; ProductsCollection.Add(product); } } catch (System.Exception) { throw; } finally { IsLoading = false; } }
private MikuDbAlbumContract GetAlbumData(HtmlDocument doc, string url) { var data = new ImportedAlbumDataContract(); string title = string.Empty; var titleElem = doc.DocumentNode.SelectSingleNode(".//h2[@class='posttitle']/a"); if (titleElem != null) { title = HtmlEntity.DeEntitize(titleElem.InnerText); } var coverPicLink = doc.DocumentNode.SelectSingleNode(".//div[@class='postcontent']/table/tr[1]/td[1]/a/img"); PictureDataContract coverPicture = null; if (coverPicLink != null) { var address = coverPicLink.Attributes["src"].Value; coverPicture = DownloadCoverPicture(address); } var infoBox = doc.DocumentNode.SelectSingleNode(".//div[@class='postcontent']/table/tr[1]/td[2]"); if (infoBox != null) { ParseInfoBox(data, infoBox); } var trackListRow = FindTracklistRow(doc, (infoBox != null ? infoBox.ParentNode.NextSibling : null)); if (trackListRow != null) { ParseTrackList(data, trackListRow); } return(new MikuDbAlbumContract { Title = title, Data = data, CoverPicture = coverPicture, SourceUrl = url }); }
public override VideoUrlParseResult ParseByUrl(string url, bool getTitle) { var id = GetIdByUrl(url); if (string.IsNullOrEmpty(id)) { return(VideoUrlParseResult.CreateError(url, VideoUrlParseResultType.NoMatcher, "No matcher")); } var requestUrl = string.Format("http://api.bilibili.tv/view?type=xml&appkey={0}&id={1}", AppConfig.BilibiliAppKey, id); var request = WebRequest.Create(requestUrl); XDocument doc; try { using (var response = request.GetResponse()) using (var stream = response.GetResponseStream()) { doc = XDocument.Load(stream); } } catch (WebException x) { log.WarnException("Unable to load Bilibili URL " + url, x); return(VideoUrlParseResult.CreateError(url, VideoUrlParseResultType.LoadError, new VideoParseException("Unable to load Bilibili URL: " + x.Message, x))); } var titleElem = doc.XPathSelectElement("/info/title"); var thumbElem = doc.XPathSelectElement("/info/pic"); var authorElem = doc.XPathSelectElement("/info/author"); if (titleElem == null) { return(VideoUrlParseResult.CreateError(url, VideoUrlParseResultType.LoadError, "No title element")); } var title = HtmlEntity.DeEntitize(titleElem.Value); var thumb = thumbElem != null ? thumbElem.Value : string.Empty; var author = authorElem != null ? authorElem.Value : string.Empty; return(VideoUrlParseResult.CreateOk(url, PVService.Bilibili, id, VideoTitleParseResult.CreateSuccess(title, author, thumb))); }
internal Gallery( long gid, string error = null, string token = "0", string title = null, string title_jpn = null, string category = null, string thumb = null, string uploader = null, string posted = null, string filecount = null, long filesize = 0, bool expunged = true, string rating = null, string torrentcount = null, string[] tags = null) : this(gid, EToken.Parse(token.CoalesceNullOrWhiteSpace("0")), int.Parse(filecount, NumberStyles.Integer, CultureInfo.InvariantCulture)) { if (error != null) { throw new Exception(error); } Available = !expunged; Title = HtmlEntity.DeEntitize(title); TitleJpn = HtmlEntity.DeEntitize(title_jpn); if (!_CategoriesForRestApi.TryGetValue(category, out var ca)) { ca = Category.Unspecified; } Category = ca; Uploader = HtmlEntity.DeEntitize(uploader); Posted = DateTimeOffset.FromUnixTimeSeconds(long.Parse(posted, NumberStyles.Integer, CultureInfo.InvariantCulture)); FileSize = filesize; Expunged = expunged; Rating.AverageScore = double.Parse(rating, NumberStyles.Number, CultureInfo.InvariantCulture); TorrentCount = int.Parse(torrentcount, NumberStyles.Integer, CultureInfo.InvariantCulture); Tags = new TagCollection(this, tags.Select(tag => Tag.Parse(tag))); ThumbUri = ThumbClient.FormatThumbUri(thumb); }
/* * * public static string CleanText ( string Text ) * { * * string CleanedText = ""; * * if( !string.IsNullOrEmpty( Text ) ) * { * * CleanedText = Text; * * CleanedText = Regex.Replace( CleanedText, @"<!.+?>", " ", RegexOptions.Singleline ); * CleanedText = Regex.Replace( CleanedText, @"<!--.+?-->", " ", RegexOptions.Singleline ); * CleanedText = Regex.Replace( CleanedText, @"[\s]+", " ", RegexOptions.Singleline ); * CleanedText = Regex.Replace( CleanedText, @"(?<![\w\d])([^\p{L}\p{N}\p{Sc}]+)", " ", RegexOptions.Singleline ); * CleanedText = Regex.Replace( CleanedText, @"([^\p{L}\p{N}\p{Sc}]+)(?![\w\d])", " ", RegexOptions.Singleline ); * CleanedText = Regex.Replace( CleanedText, @"([\p{P}\p{Sc}]+)(?![\w\d])", " ", RegexOptions.Singleline ); * CleanedText = Regex.Replace( CleanedText, @"[\s]+", " ", RegexOptions.Singleline ); * * CleanedText = CleanedText.Trim(); * * } * * return( CleanedText ); * * } * */ /**************************************************************************/ public static string CompactWhiteSpace(string Text) { string NewText = Text; if (!string.IsNullOrEmpty(NewText)) { try { NewText = HtmlEntity.DeEntitize(NewText); } catch (Exception ex) { DebugMsgStatic(string.Format("CompactWhiteSpace: {0}", ex.Message)); } NewText = Regex.Replace(NewText, @"[\s]+", " ", RegexOptions.Singleline); NewText = Regex.Replace(NewText, @"[\s]+$", "", RegexOptions.Singleline); NewText = Regex.Replace(NewText, @"[\r\n]+", Environment.NewLine, RegexOptions.Singleline); } return(NewText); }
private WorkAssignment ParseWorkAssignmentFromHtml(HtmlNode assignment, string date) { var assignmentId = AssignmentIdRegex.Match(assignment.GetAttributeValue("uo", string.Empty)).Groups["id"]?.Value; var placement = HtmlEntity.DeEntitize(assignment.SelectSingleNode("div[@title]").InnerText); var timeString = assignment.SelectSingleNode("b").InnerText; var timeSplit = timeString.Split(" - "); var startTime = timeSplit[0]; var endTime = timeSplit[1]; var parsedStartTime = DateTime.Parse($"{date} {startTime}"); var parsedEndTime = DateTime.Parse($"{date} {endTime}"); _logger.Info($"Found appointment. Date: {date}, ID: {assignmentId}, Placement: {placement}, Start: {parsedStartTime:t}, End: {parsedEndTime:t}"); return(new WorkAssignment { Id = assignmentId, Start = parsedStartTime, End = parsedEndTime, Placement = placement }); }
private void LoadAlCapone() { int rowPerDay = 6; HtmlNode doc = Utils.GetHtmlDoc(Constants.alCaponeUrl).DocumentNode; HtmlNode menu = doc.SelectSingleNode("//table[@class='table table-responsive']/tbody"); HtmlNodeCollection rows = menu.SelectNodes("./tr"); if (rows == null) { throw new WeekendEmptyException("Pizzeria Alcapone - Brno:\nV menu nejsou o víkendu žádné položky, vraťe se v pondělí."); } int daysCount = rows.Count / rowPerDay; DayMenu[] dayMenus = new DayMenu[daysCount]; for (int i = 0; i < daysCount; i++) { string dateStr = rows[i * rowPerDay].SelectSingleNode("./td/h3").InnerText; DateTime date = Utils.ParseDateTime(dateStr); string soup = rows[i * rowPerDay + 1].SelectSingleNode("./td[2]/h3").InnerText; Food[] foods = new Food[4]; for (int j = 2; j < rowPerDay; j++) { HtmlNode actRow = rows[i * rowPerDay + j]; string description = HtmlEntity.DeEntitize(actRow.SelectSingleNode("./td[2]/h3").InnerText); int price = Utils.ParsePrice( actRow.SelectSingleNode("./td[3]/h3").InnerText); foods[j - 2] = new Food(description, price); } dayMenus[i] = new DayMenu(date, soup, foods); } string restaurantName = GetRestaurantName(doc); SaveRestaurant(restaurantName, dayMenus, Restaurants.AlCapone); }
public static string ProcessQuote(HtmlNode quote, HtmlDocument doc) { RemoveViewPost(quote); string quoteContent = ""; quoteContent = Model.Post.GetQuoteBorderTop(); HtmlNode td = quote.SelectSingleNode("./table").SelectSingleNode(".//td[@class='alt2']"); if (td != null) { HtmlNodeCollection quotes = td.SelectNodes("./div[@style='margin:20px; margin-top:5px; ']"); if (quotes != null) { foreach (HtmlNode q in quotes) { string s = ProcessQuote(q, doc); HtmlNode newNode = doc.CreateElement("title"); newNode.InnerHtml = HtmlDocument.HtmlEncode(s); q.ParentNode.ReplaceChild(newNode, q); } } //check if quote has user and link HtmlNode strongUserName = td.SelectSingleNode(".//strong"); if (strongUserName != null) //user quote exist { string quoteUser = "******" + HtmlEntity.DeEntitize(strongUserName.InnerText.Trim()) + "</b>"; quoteContent += quoteUser + "<br>"; td.RemoveChild(td.Element("div")); } quoteContent += HtmlEntity.DeEntitize(td.InnerText.Trim()); quoteContent += Model.Post.GetQuoteBorderBottom(); } else { quoteContent += HtmlEntity.DeEntitize(td.InnerText.Trim()).Trim(); } return(quoteContent); }
public ProductDetail GetProductDetail(string link) { var _web = HtmlWebSingleton.GetInstance(); HtmlDocument document = _web.Load(baseLink + link); // set detail where we take the first class contain all item we want to selectoe //var detail = document.DocumentNode.QuerySelector(".product-details"); var detail = document.DocumentNode.QuerySelector(".f-wrap"); var product = new ProductDetail(); //product.Description = HtmlEntity.DeEntitize(detail.QuerySelector(".info .simple-prop").InnerHtml.InsertNewLine().RemoveHtmlTag()); product.Description = HtmlEntity.DeEntitize(detail.QuerySelector(".fs-dtbox main_spec .fs-tsright >li").InnerHtml.InsertNewLine().RemoveHtmlTag()); //product.Description = detail.QuerySelector(".info .simple-prop").InnerHtml; product.ProductName = HtmlEntity.DeEntitize(detail.QuerySelector("h1").InnerText); product.Price = detail.QuerySelector(".product-price span").InnerText; var links = detail.QuerySelectorAll("#slider1_container > div > div img:first-child").ToList(); foreach (var item in links) { product.ImageLinkLst.Add(item.Attributes["src"].Value); } return(product); }
public List <MyPurdueSubject> ParseHtml(string content) { HtmlDocument document = new HtmlDocument(); document.LoadHtml(content); HtmlNode root = document.DocumentNode; HtmlNodeCollection termSelectNodes = root.SelectNodes("//select[@id='subj_id'][1]/option"); var subjects = new List <MyPurdueSubject>(); foreach (var node in termSelectNodes) { var code = HtmlEntity.DeEntitize(node.Attributes["VALUE"].Value).Trim(); var name = HtmlEntity.DeEntitize(node.InnerText).Trim(); name = name.Substring(name.IndexOf("-") + 1); subjects.Add(new MyPurdueSubject() { SubjectCode = code, SubjectName = name }); } return(subjects); }
private string GetTitle(string url) { try { var req = (HttpWebRequest)HttpWebRequest.Create(url); req.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.77 Safari/535.7"; req.AllowAutoRedirect = true; var response = req.GetResponse(); if (!response.ContentType.Contains("text/html")) { return(null); } var doc = new HtmlDocument(); doc.Load(response.GetResponseStream()); var titleMeta = doc.DocumentNode.SelectSingleNode("//meta[@name='title']"); string title = "N/A"; if (titleMeta != null) { title = titleMeta.Attributes["content"].Value; } else { var node = doc.DocumentNode.SelectSingleNode("//title"); if (node != null) { title = node.InnerText.Trim(); } } return(HtmlEntity.DeEntitize(title)); } catch (Exception e) { return(null); } }
private void ParseAnchor(HtmlNode node) { if (Logger.IsDebugEnabled) { Logger.Debug(string.Format("a: {0}, {1}", node.Name, node.InnerText)); } var hrefAttr = node.Attributes["href"]; var href = hrefAttr == null ? null : hrefAttr.Value; foreach (var child in node.ChildNodes) { if (_currentParagraph == null) { _currentParagraph = CreateParagraph(); //_currentParagraph = new Paragraph(); } switch (child.Name.ToLowerInvariant()) { case "img": ParseImage(child); break; case "#text": var text = child.InnerText; if (!StringUtil.IsNullOrWhitespace(text)) { var run = new Run(HtmlEntity.DeEntitize(text)); if (!StringUtil.IsNullOrWhitespace(href)) { run.Link = new Link(href); } _currentParagraph.LineSegments.Last().InsertAfter(run); } break; } } }
/// <summary> /// This methods searches all nodes that have a span tag with times new roman and multiple blanks /// </summary> /// <param name="node"></param> /// <returns></returns> private static List <string> IdentifyNodesWithBulletPoints(HtmlNode node) { List <string> xpaths = new List <string>(); if (node.Name.Contains("span")) { foreach (var nodethatmatches in node.Attributes.Where(x => x.Name.Contains("style") && x.Value.Contains("Times New Roman"))) { if (string.IsNullOrWhiteSpace(HtmlEntity.DeEntitize(node.InnerHtml))) { xpaths.Add(nodethatmatches.XPath); } } } foreach (var childNode in node.ChildNodes) { xpaths.AddRange(IdentifyNodesWithBulletPoints(childNode)); } return(xpaths); }
private string getTextFromHtml(string html) { string ret = ""; HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); StringBuilder sb = new StringBuilder(); if (doc != null && doc.DocumentNode != null) { var textNodes = doc.DocumentNode.SelectNodes("//text()"); if (textNodes != null) { foreach (HtmlTextNode node in textNodes) { sb.AppendLine(HtmlEntity.DeEntitize(node.Text)); } ret = sb.ToString(); } } return(ret); }