public GetAttributeValue ( string name, bool def ) : bool | ||
name | string | The name of the attribute to get. May not be null. |
def | bool | The default value to return if not found. |
return | bool |
public bool SetValue(HtmlNode n, string value) { if (n is HtmlNode && n.Name == "select") { foreach (HtmlNode o in n.SelectNodes("option")) { o.SetAttributeValue("selected", o.GetAttributeValue("value", "").Equals(value) ? "selected" : ""); } return true; } if (n is HtmlNode && n.Name == "input") { switch (n.GetAttributeValue("type", "")) { case "radio": n.SetAttributeValue("checked", n.GetAttributeValue("value", "").Equals(value) ? "checked" : ""); break; default: n.SetAttributeValue("value", value); break; } n.SetAttributeValue("value", value); return true; } return false; }
public async Task<HtmlNode> VisitAsync(VisitingContext context, HtmlNode node) { // We're only interested in stylesheets. if (node.GetAttributeValue("rel", null) != "stylesheet") return node; var href = node.GetAttributeValue("href", null); if (href == null) return node; var hrefUri = new Uri(href, UriKind.RelativeOrAbsolute); if (!hrefUri.IsAbsoluteUri) { hrefUri = new Uri(context.Address, hrefUri); } // Get the stylesheet and insert it inline. var content = default(string); try { content = await context.WebClient.DownloadAsync(hrefUri); } catch (WebException) { return node; } content = "<style>" + content + "</style>"; return HtmlNode.CreateNode(content); }
/// <summary> /// /// </summary> /// <param name="formNode"></param> /// <param name="session"></param> /// <param name="baseUrl"></param> /// <param name="charset">The character set used in the previoius response (from which the form originates).</param> public Form(HtmlNode formNode, ISession session, Uri baseUrl, string charset) { Condition.Requires(formNode, "formNode").IsNotNull(); Condition.Requires(session, "session").IsNotNull(); Condition.Requires(baseUrl, "baseUrl").IsNotNull(); if (!formNode.Name.Equals("form", StringComparison.OrdinalIgnoreCase)) throw new ArgumentException(string.Format("Cannot create HTML form from '{0}' node.", formNode.Name)); Action = new Uri(baseUrl, formNode.GetAttributeValue("action", "")); Method = formNode.GetAttributeValue("method", "get"); Session = session; BaseUrl = baseUrl; ResponseCharset = charset; string enctype = formNode.GetAttributeValue("enctype", null); EncodingType = (enctype != null ? new MediaType(enctype) : MediaType.ApplicationFormUrlEncoded); AcceptCharset = formNode.GetAttributeValue("accept-charset", null); Values = new Hashtable(); SubmitElements = new List<SubmitElement>(); ParseInputs(formNode); }
private static int GetPageNumber(HtmlNode threadNode) { if (threadNode != null && !string.IsNullOrEmpty(threadNode.GetAttributeValue("value", string.Empty))) { return Convert.ToInt32(threadNode.GetAttributeValue("value", string.Empty)); } return 1; }
public override string Convert(HtmlNode node) { string alt = node.GetAttributeValue("alt", string.Empty); string src = node.GetAttributeValue("src", string.Empty); string title = this.ExtractTitle(node); title = title.Length > 0 ? string.Format(" \"{0}\"", title) : ""; return string.Format("![{0}]({1}{2})", alt, src, title); }
public static Note Parse(HtmlNode Node) { string Grade = Node.InnerText.Trim(); string Name = Node.GetAttributeValue("title", "Névtelen jegy"); string Href = Node.GetAttributeValue("href", ""); Match Match = Regex.Match(Href, "jegyId=(?<id>[0-9]+)", RegexOptions.IgnoreCase); int ID = int.Parse(Match.Groups["id"].Value); string T = Node.GetAttributeValue("class", "jegy2"); NoteType Type = (NoteType)(int.Parse(T.Substring(T.Length - 1, 1))); return new Note(ID, Grade, Name, Type); }
public static string GetTargetFromFilter(HtmlNode node, IFilter filter) { switch (filter.GetNodeType()) { case NodeType.Image: return node.GetAttributeValue("src", ""); case NodeType.Link: return node.GetAttributeValue("href", ""); default: return ""; } }
/// <summary> /// Parses a forum post in a thread. /// </summary> /// <param name="postNode">The post HTML node.</param> public void Parse(HtmlNode postNode) { User = ForumUserEntity.FromPost(postNode); HtmlNode postDateNode = postNode.Descendants() .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("postdate")); string postDateString = postDateNode == null ? string.Empty : postDateNode.InnerText; if (postDateString != null) { PostDate = postDateString.WithoutNewLines().Trim(); } PostIndex = ParseInt(postNode.GetAttributeValue("data-idx", string.Empty)); var postId = postNode.GetAttributeValue("id", string.Empty); if (!string.IsNullOrEmpty(postId) && postId.Contains("#")) { PostId = Int64.Parse(postNode.GetAttributeValue("id", string.Empty) .Replace("post", string.Empty) .Replace("#", string.Empty)); } else if (!string.IsNullOrEmpty(postId) && postId.Contains("post")) { PostId = Int64.Parse(postNode.GetAttributeValue("id", string.Empty) .Replace("post", string.Empty)); } else { PostId = 0; } var postBodyNode = postNode.Descendants("td") .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("postbody")); this.FixQuotes(postBodyNode); PostHtml = postBodyNode.InnerHtml; HtmlNode profileLinksNode = postNode.Descendants("td") .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("postlinks")); HtmlNode postRow = postNode.Descendants("tr").FirstOrDefault(); if (postRow != null) { HasSeen = postRow.GetAttributeValue("class", string.Empty).Contains("seen"); } User.IsCurrentUserPost = profileLinksNode.Descendants("img") .FirstOrDefault(node => node.GetAttributeValue("alt", string.Empty).Equals("Edit")) != null; }
static AnchorAnalysis AnalyseAnchor(HtmlNode anchor) { var href = anchor.GetAttributeValue("href", null); var rel = anchor.GetAttributeValue("rel", null); var title = anchor.GetAttributeValue("title", null); var text = anchor.InnerText; // todo: determine if its an offsite link? // determine if it contains a title, has text - if not, image? adivse return new AnchorAnalysis(text, title, href, rel, new Message[0]); }
/// <summary> /// Parses a thread HTML node to extract the information from it. /// </summary> /// <param name="threadNode">The thread HTML node.</param> public void Parse(HtmlNode threadNode) { this.Name = WebUtility.HtmlDecode(threadNode.Descendants("a").FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("thread_title")).InnerText); this.KilledBy = threadNode.Descendants("a").LastOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("author")).InnerText; this.IsSticky = threadNode.Descendants("td").Any(node => node.GetAttributeValue("class", string.Empty).Contains("title_sticky")); this.IsLocked = threadNode.GetAttributeValue("class", string.Empty).Contains("closed"); this.CanMarkAsUnread = threadNode.Descendants("a").Any(node => node.GetAttributeValue("class", string.Empty).Equals("x")); this.HasBeenViewed = this.CanMarkAsUnread; this.Author = threadNode.Descendants("td").FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("author")).InnerText; if (threadNode.Descendants("a").Any(node => node.GetAttributeValue("class", string.Empty).Equals("count"))) { this.RepliesSinceLastOpened = Convert.ToInt32(threadNode.Descendants("a").FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("count")).InnerText); } if (threadNode.Descendants("td").Any(node => node.GetAttributeValue("class", string.Empty).Contains("replies"))) { this.ReplyCount = Convert.ToInt32(threadNode.Descendants("td").FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Contains("replies")).InnerText); } else { this.ReplyCount = 1; } // Isn't this user configurable? this.TotalPages = (this.ReplyCount / 40) + 1; this.Location = Constants.BASE_URL + threadNode.Descendants("a").FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("thread_title")).GetAttributeValue("href",string.Empty) + Constants.PER_PAGE; this.ThreadId = Convert.ToInt64(threadNode.Descendants("a").FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("thread_title")).GetAttributeValue("href",string.Empty).Split('=')[1]); this.ImageIconLocation = threadNode.Descendants("td").FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("icon")).Descendants("img").FirstOrDefault().GetAttributeValue("src", string.Empty); }
private static bool IsUnwantedLanguageDiv(HtmlNode div) { return div.GetAttributeValue("class", "").Equals("libCScode") && !div.Element("div").Element("div").InnerText.Trim().Equals("c#", StringComparison.CurrentCultureIgnoreCase); }
private MarketRateModel BuildMarketRate(HtmlNode monthNode) { var result = new MarketRateModel(); string value = ""; var dataCode = monthNode.GetAttributeValue("href", "http://finance.ifeng.com/app/hq/stock/sh000001/"); if ("http://finance.ifeng.com/app/hq/stock/sh000001/" == dataCode) { value = monthNode.NextSibling.NextSibling.InnerText.Trim(); value = value.Split(' ')[0]; result.Type = RateType.StockShangzheng; result.Rate = decimal.Parse(value); } else if ("http://finance.ifeng.com/app/hq/stock/sz399001/" == dataCode) { value = monthNode.NextSibling.NextSibling.InnerText.Trim(); value = value.Split(' ')[0]; result.Type = RateType.StockShenzhen; result.Rate = decimal.Parse(value); } else { return null; } result.CreateTime = DateTime.Now; result.RateDay = DateTime.Now.Date; result.Source = SourceType.eIfeng; return result; }
/// <summary> /// 递归遍历内容中的图片 /// </summary> /// <param name="node"></param> /// <returns></returns> public static void EachImages(HtmlNode node, string baseUri = "") { //判断是否有子标签 if (node.HasChildNodes) foreach (HtmlNode nn in node.ChildNodes) EachImages(nn, baseUri); else if (node.Name == "img") { //图片 string url = node.GetAttributeValue("src", ""); if (url == "") return; //获取图片信息,生成新的路径 //getimages/{Year}/{Week}/{FileName} string exe = Path.GetExtension(url).TrimStart(new char[] { '.' }); string fileName = Guid.NewGuid().ToString() + "." + exe; int day = DateTime.Now.Day; //相对路径 string fullName = string.Format("autoimages\\{0}\\{3}\\{1}\\{2}." + exe, DateTime.Now.Year, day <= 10 ? 1 : (day <= 20 ? 2 : 3), Guid.NewGuid().ToString(), DateTime.Now.Month); //网站 string urlNew = "/" + fullName.Replace("\\", "/"); node.SetAttributeValue("src", urlNew); //保存到本地 Uri uri = baseUri == "" ? new Uri(url) : new Uri(new Uri(baseUri), url); SaveImg(uri.AbsoluteUri, fullName); } }
public async Task<HtmlNode> VisitAsync(VisitingContext context, HtmlNode node) { var src = node.GetAttributeValue("src", null); if (src == null) return node; // Take care if the src starts with two slashes. if (src.StartsWith("//")) { src = "http:" + src; } var srcUri = new Uri(src, UriKind.RelativeOrAbsolute); if (!srcUri.IsAbsoluteUri) { srcUri = new Uri(context.Address, srcUri); } // Get the script and insert it inline. var content = default(string); try { content = await context.WebClient.DownloadAsync(srcUri); } catch (WebException) { return node; } content = "<script>" + content + "</script>"; return HtmlNode.CreateNode(content); }
private string GetLanguageFromConfluenceClassAttribute(HtmlNode node) { string val = node.GetAttributeValue("class", ""); var rx = new System.Text.RegularExpressions.Regex(@"brush:\s?(:?.*);"); var res = rx.Match(val); return res.Success ? res.Value : ""; }
public StineLinkTreeNode(HtmlNode node) { base.Text = HttpUtility.HtmlDecode(node.InnerHtml); URL = node.GetAttributeValue("href", "not_found"); URL = HttpUtility.HtmlDecode(URL); HTML_NODE = node; }
private bool ClassesMatches(HtmlNode node) { if (classes == null || classes.Length < 1) { return true; } string classString; if ((classString = node.GetAttributeValue("class", null)) != null) { string[] nodeClasses = classString.Split(' '); if (nodeClasses.Length <= 0) return false; bool allMatch = false; foreach (string filterClass in classes) { bool localMatch = false; foreach (string nodeClass in nodeClasses) { if (filterClass == nodeClass) { localMatch = true; } } allMatch = localMatch; } return allMatch; } return false; }
private string GetLanguageFromHighlightClassAttribute(HtmlNode node) { string val = node.GetAttributeValue("class", ""); var rx = new System.Text.RegularExpressions.Regex("highlight-([a-zA-Z0-9]+)"); var res = rx.Match(val); return res.Success ? res.Value : ""; }
private ICollection<PostNodeBase> CreateLinkAttrNode(HtmlNode node, ICollection<PostNodeBase> res) { var linkUri = GetLinkText(node.GetAttributeValue("href", null)); var uriService = Services.GetServiceOrThrow<IMakabaUriService>(); var detectedLink = uriService.TryParsePostLink(linkUri); if (detectedLink != null) { return CreateNode(res, new PostNodeBoardLinkAttribute() { BoardLink = detectedLink }); } var youtubeService = Services.GetServiceOrThrow<IYoutubeIdService>(); var youtubeId = youtubeService.GetYoutubeIdFromUri(linkUri); if (youtubeId != null) { return CreateNode(res, new PostNodeBoardLinkAttribute() { BoardLink = new YoutubeLink() { Engine = CoreConstants.Engine.Makaba, YoutubeId = youtubeId } }); } return CreateNode(res, new PostNodeLinkAttribute() { LinkUri = linkUri }); }
public static BookInfo AmazonSearchBook(string title, string author) { BookInfo result = null; string authorTrim = ""; Regex regex = new Regex(@"( [A-Z]\.)", RegexOptions.Compiled); Match match = Regex.Match(author, @"( [A-Z]\.)", RegexOptions.Compiled); if (match.Success) { foreach (Match m in regex.Matches(author)) { authorTrim = author.Replace(m.Value, m.Value.Trim()); } } else { authorTrim = author; } if (title.IndexOf(" (") >= 0) { title = title.Substring(0, title.IndexOf(" (")); } string searchUrl = @"http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Ddigital-text&field-keywords=" + Uri.EscapeDataString(title + " " + authorTrim + " kindle edition"); HAP.HtmlDocument searchDoc = new HAP.HtmlDocument(); searchDoc.LoadHtml(HttpDownloader.GetPageHtml(searchUrl)); HAP.HtmlNode node = searchDoc.DocumentNode.SelectSingleNode("//li[@id='result_0']"); //At least attempt to verify it might be the same book? //Ignore case of title if (node != null && node.InnerText.IndexOf(title, StringComparison.OrdinalIgnoreCase) >= 0) { string foundASIN = node.GetAttributeValue("data-asin", ""); node = node.SelectSingleNode(".//div/div/div/div[@class='a-fixed-left-grid-col a-col-right']/div/a"); if (node != null) { result = new BookInfo(node.InnerText, author, foundASIN); result.amazonUrl = node.GetAttributeValue("href", ""); // Grab the true link for good measure } } return(result); }
private static ThreadPostMetadata ParseIsIgnored(this ThreadPostMetadata post, HtmlNode postNode) { post.IsIgnored = false; var classAttribute = postNode.GetAttributeValue("class", string.Empty); if (classAttribute.ToLower().Contains("ignored")) post.IsIgnored = true; return post; }
private static DocumentObject AddHyperlink(DocumentObject section, HtmlNode node) { string href = node.GetAttributeValue("href", ""); Hyperlink link; if (href.StartsWith("#")) link = GetParagraph(section).AddHyperlink(href.Substring(1), HyperlinkType.Bookmark); else link = GetParagraph(section).AddHyperlink(href, HyperlinkType.Web); return link; }
private static int ParseProjectId(HtmlNode node) { var link = node.GetAttributeValue("href", null); var splicedUrl = link.Split('/'); if (splicedUrl.Length >= 4 && splicedUrl[4] != "users") { return Convert.ToInt32(splicedUrl[4]); } return 0; }
private static ExternalReference GetExternalReference(HtmlNode externalReferenceNode, int index) { string referenceUrl = externalReferenceNode.GetAttributeValue("href", string.Empty); string referenceId = GetExternalReferenceId(index); externalReferenceNode.InnerHtml = string.Format("[{0}]", index); externalReferenceNode.SetAttributeValue("href", "#" + referenceId); externalReferenceNode.SetAttributeValue("name", referenceId + BackLinkReferenceIdSuffix); return new ExternalReference { Index = index, Url = referenceUrl }; }
protected DateTime RetrieveToDateTime( HtmlNode node , string attribute ) { var value = node.GetAttributeValue( attribute , string.Empty ); if ( string.IsNullOrEmpty( value ) ) { return DateTime.Now; } var epoch = new DateTime( 1970 , 1 , 1 , 0 , 0 , 0 ); return epoch.AddSeconds( double.Parse( value ) ); }
public Item(HtmlNode code) { htmlCode = code; name = code.GetAttributeValue("data-name", ""); //full item name from the source (may include type) id = code.GetAttributeValue("data-id", ""); //either a number or "stock" //"unusual x", "unique x", "strange x", etc are all possibilities if (itemTypes.Contains(name.Split(' ')[0])) { //extract the type from the full itemname type = name.Remove(name.IndexOf(' ')); //strip the type from the full itemname name = name.Remove(0, type.Length + 1); } level = code.GetAttributeValue("data-subtitle", ""); HtmlNode paintNode = code.SelectSingleNode("div[@class='paint']"); paint = paintNode != null ? paintNode.GetAttributeValue("style", "").Split(':')[1] : ""; }
private static void AddExCssStyle(this IXLWorksheet worksheet, int rowId, int colId, HtmlNode cell) { var css = cell.GetAttributeValue("style", ""); var parser = new StylesheetParser(); var stylesheet = parser.Parse(css); SimpleSelector color = stylesheet.RuleSets.SelectMany(x => x.Selectors) .SelectMany(x => x.SimpleSelectors) .FirstOrDefault(x => x.ElementName == "color"); string thing = (color == null) ? "" : color.Child.Pseudo; worksheet.Cell(rowId, colId).Style.Font.FontColor = XLColor.FromName(thing); }
private bool TryParseImage(HtmlNode node, out Image image) { image = null; var name = node.Name.ToLowerInvariant(); if (name == "img") { var empty = ""; var source = node.GetAttributeValue("src", empty); if (source != empty) { int width; int.TryParse(node.GetAttributeValue("width", empty), out width); int height; int.TryParse(node.GetAttributeValue("height", empty), out height); image = new Image(source, width, height); return true; } } return false; }
protected override Uri GetFullPageUrl(HtmlNode selectOption, MangaBook mangaBook) { string selectOptionValueAttribute = selectOption.GetAttributeValue("value", ""); string pageUrl = String.Format("{0}/{1}/{2}/{3}/{4}.html", mangaBook.UrlCategoryName, mangaBook.Title, mangaBook.Volume, mangaBook.Chapter, selectOptionValueAttribute); return String.IsNullOrEmpty(selectOptionValueAttribute) ? null : new Uri(HostUri, pageUrl); }
public static int AddTd(this IXLWorksheet worksheet, int rowId, int colId, HtmlNode cell) { // add text worksheet.Cell(rowId, colId).Value = cell.InnerText; // add data type worksheet.Cell(rowId, colId).DataType = GetDataType(cell.InnerText); // colspan merge int colspan = cell.GetAttributeValue("colspan", 1); if (colspan > 1) { var mergeUs = worksheet.Range(rowId, colId, rowId, colId + (colspan - 1)); mergeUs.Merge(); } // Try style with CssStyleCollection var stylernater = CssStyleTools.Create(); stylernater.Value = cell.GetAttributeValue("style", ""); string color = stylernater["color"]; if (!string.IsNullOrWhiteSpace(color)) { // rgb( //var snot = "rgb(255, 0, 0)"; var snot = color; var poo = snot.Substring(4, snot.Length-4); var foo = poo.Substring(0, poo.Length - 1); string[] noo = foo.Split(','); var goo = noo.Select(x => x.Trim()); List<int> hoo = goo.Select(x => int.Parse(x)).ToList(); worksheet.Cell(rowId, colId).Style.Font.FontColor = XLColor.FromArgb(hoo[0], hoo[1], hoo[2]); //.FromName(color); } // YAAHGHGhghghgh. This is just as bad as ExCss! // add style? //worksheet.AddExCssStyle(rowId, colId, cell); return colspan; }
public static TextVisualProperties Update(this TextVisualProperties properties, HtmlNode element, CSS css) { var list = new List<string> { element.Name }; string attributeValue = element.GetAttributeValue("class", string.Empty); if (!string.IsNullOrEmpty(attributeValue)) { list.Add(element.Name + "." + attributeValue); list.Add("." + attributeValue); } return properties.UpdateCss(css, list.ToArray()); }
//static int nLevels = 64; //static Size testPadding = new Size(32, 32); //static double hitThreshold = 0; //static int groupThreshold = 2; //static double scaleStep = 1.05; //static bool useMeanShiftGrouping = false; public static List <PSM4TxSample> LoadSamples(string folder) { //Load all samples from all the folders List <PSM4TxSample> samples = new List <PSM4TxSample>(); List <string> missing_xml_samples = new List <string>(); string[] files = Directory.EnumerateFiles(folder) .Where(file => file.ToLower().EndsWith(".bmp") || file.ToLower().EndsWith(".jpg")) .ToArray(); for (int k = 0; k < files.Length; k++) { string img_file = files[k].ToLower(); string xml_file = img_file.EndsWith("bmp") ? img_file.Replace(".bmp", "_data.xml") : img_file.Replace(".jpg", "_data.xml"); if (!File.Exists(xml_file)) { missing_xml_samples.Add(img_file); continue; } PSM4TxSample sample = new PSM4TxSample(img_file); // .// Means descendants, which includes children of children (and so forth). // ./ Means direct children. //If a XPath starts with a / it becomes relative to the root of the document; //to make it relative to your own node start it with ./. HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.Load(xml_file); HtmlAgilityPack.HtmlNodeCollection shape_nodes = doc.DocumentNode.SelectNodes("//shape"); foreach (HtmlAgilityPack.HtmlNode shape_node in shape_nodes) { HtmlAgilityPack.HtmlNode blocktext_node = shape_node.SelectSingleNode(".//blocktext"); HtmlAgilityPack.HtmlNode text_node = blocktext_node.SelectSingleNode("./text"); HtmlAgilityPack.HtmlNode data_node = shape_node.SelectSingleNode(".//data"); HtmlAgilityPack.HtmlNode extent_node = data_node.SelectSingleNode("./extent"); string s_x = extent_node.GetAttributeValue("X", ""); string s_y = extent_node.GetAttributeValue("Y", ""); string s_w = extent_node.GetAttributeValue("Width", ""); string s_h = extent_node.GetAttributeValue("Height", ""); if (text_node.InnerText.ToLower() == "isolator") { sample.SetIsolator(s_x, s_y, s_w, s_h); //(787.18896484375, 1370.0) } else if (text_node.InnerText.ToLower() == "arrayblock") { sample.SetArrayBlock(s_x, s_y, s_w, s_h); //(1052.86828613281, 1201.8359375) } else if (text_node.InnerText.ToLower() == "aperture") { sample.SetAperture(s_x, s_y, s_w, s_h); //(209.156982421875, 1885.03271484375) } } samples.Add(sample); } using (StreamWriter sw = new StreamWriter("missing_xml.txt")) { for (int i = 0; i < missing_xml_samples.Count; i++) { sw.WriteLine(missing_xml_samples[i]); } } return(samples); }
/// <summary> /// Fill an object and go through it's properties and fill them too. /// </summary> /// <param name="targetType">Type of object to want to fill. It should have atleast one property that defined XPath.</param> /// <param name="htmlDocument">If htmlDocument includes data , leave this parameter null. Else pass your specific htmldocument.</param> /// <returns>Returns an object of type targetType including Encapsulated data.</returns> public object GetEncapsulatedData(Type targetType, HtmlDocument htmlDocument = null) { #region SettingPrerequisite if (targetType == null) { throw new ArgumentNullException("Parameter targetType is null"); } HtmlDocument source = null; if (htmlDocument == null) { source = OwnerDocument; } else { source = htmlDocument; } object targetObject; if (targetType.IsInstantiable() == false) // if it can not create instanse of T because of lack of constructor in type T. { throw new MissingMethodException("Parameterless Constructor excpected for " + targetType.FullName); } else { targetObject = Activator.CreateInstance(targetType); } #endregion SettingPrerequisite #region targetObject_Defined_XPath if (targetType.IsDefinedAttribute(typeof(HasXPathAttribute)) == true) // Object has xpath attribute (Defined HasXPath) { // Store list of properties that defined xpath attribute IEnumerable <PropertyInfo> validProperties = targetType.GetPropertiesDefinedXPath(); if (validProperties.CountOfIEnumerable() == 0) // if no XPath property exist in type T while T defined HasXpath attribute. { throw new MissingXPathException("Type " + targetType.FullName + " defined HasXPath Attribute but it does not have any property with XPath Attribte."); } else { // Fill targetObject variable Properties ( T targetObject ) foreach (PropertyInfo propertyInfo in validProperties) { // Get xpath attribute from valid properties // for .Net old versions: XPathAttribute xPathAttribute = (propertyInfo.GetCustomAttributes(typeof(XPathAttribute), false) as IList)[0] as XPathAttribute; #region Property_IsNOT_IEnumerable if (propertyInfo.IsIEnumerable() == false) // Property is None-IEnumerable { HtmlNode htmlNode = null; // try to fill htmlNode based on XPath given try { htmlNode = source.DocumentNode.SelectSingleNode(xPathAttribute.XPath); } catch // if it can not select node based on given xpath { throw new NodeNotFoundException("Cannot find node with giving XPath to bind to " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name); } if (htmlNode == null) { throw new NodeNotFoundException("Cannot find node with givig XPath to bind to " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name); } #region Property_Is_HasXPath_UserDefinedClass // Property is None-IEnumerable HasXPath-user-defined class if (propertyInfo.PropertyType.IsDefinedAttribute(typeof(HasXPathAttribute)) == true) { HtmlDocument innerHtmlDocument = new HtmlDocument(); innerHtmlDocument.LoadHtml(htmlNode.InnerHtml); object o = GetEncapsulatedData(propertyInfo.PropertyType, innerHtmlDocument); propertyInfo.SetValue(targetObject, o, null); } #endregion Property_Is_HasXPath_UserDefinedClass #region Property_Is_SimpleType // Property is None-IEnumerable value-type or .Net class or user-defined class. // AND does not deifned xpath and shouldn't have property that defined xpath. else { string result = string.Empty; if (xPathAttribute.AttributeName == null) // It target None-IEnumerable value of HTMLTag { result = Tools.GetNodeValueBasedOnXPathReturnType <string>(htmlNode, xPathAttribute); } else // It target None-IEnumerable attribute of HTMLTag { result = htmlNode.GetAttributeValue(xPathAttribute.AttributeName, null); } if (result == null) { throw new NodeAttributeNotFoundException("Can not find " + xPathAttribute.AttributeName + " Attribute in " + htmlNode.Name + " related to " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name); } object resultCastedToTargetPropertyType; try { resultCastedToTargetPropertyType = Convert.ChangeType(result, propertyInfo.PropertyType); } catch (FormatException) { throw new FormatException("Can not convert Invalid string to " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name); } catch (Exception ex) { throw new Exception("Unhandled Exception : " + ex.Message); } propertyInfo.SetValue(targetObject, resultCastedToTargetPropertyType, null); } #endregion Property_Is_SimpleType } #endregion Property_IsNOT_IEnumerable #region Property_Is_IEnumerable else // Property is IEnumerable<T> { IList <Type> T_Types = propertyInfo.GetGenericTypes() as IList <Type>; // Get T type if (T_Types == null || T_Types.Count == 0) { throw new ArgumentException(propertyInfo.Name + " should have one generic argument."); } else if (T_Types.Count > 1) { throw new ArgumentException(propertyInfo.Name + " should have one generic argument."); } else if (T_Types.Count == 1) // It is NOT something like Dictionary<Tkey , Tvalue> { HtmlNodeCollection nodeCollection; // try to fill nodeCollection based on given xpath. try { nodeCollection = source.DocumentNode.SelectNodes(xPathAttribute.XPath); } catch { throw new NodeNotFoundException("Cannot find node with givig XPath to bind to " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name); } if (nodeCollection == null || nodeCollection.Count == 0) { throw new NodeNotFoundException("Cannot find node with givig XPath to bind to " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name); } IList result = T_Types[0].CreateIListOfType(); #region Property_Is_IEnumerable<HasXPath-UserDefinedClass> if (T_Types[0].IsDefinedAttribute(typeof(HasXPathAttribute)) == true) // T is IEnumerable HasXPath-user-defined class (T type Defined XPath properties) { foreach (HtmlNode node in nodeCollection) { HtmlDocument innerHtmlDocument = new HtmlDocument(); innerHtmlDocument.LoadHtml(node.InnerHtml); object o = GetEncapsulatedData(T_Types[0], innerHtmlDocument); result.Add(o); } } #endregion Property_Is_IEnumerable<HasXPath-UserDefinedClass> #region Property_Is_IEnumerable<SimpleClass> else // T is value-type or .Net class or user-defined class ( without xpath ) { if (xPathAttribute.AttributeName == null) // It target value { try { result = Tools.GetNodesValuesBasedOnXPathReturnType(nodeCollection, xPathAttribute, T_Types[0]); } catch (FormatException) { throw new FormatException("Can not convert Invalid string in node collection to " + T_Types[0].FullName + " " + propertyInfo.Name); } catch (Exception ex) { throw new Exception("Unhandled Exception : " + ex.Message); } } else // It target attribute { foreach (HtmlNode node in nodeCollection) { string nodeAttributeValue = node.GetAttributeValue(xPathAttribute.AttributeName, null); if (nodeAttributeValue == null) { throw new NodeAttributeNotFoundException("Can not find " + xPathAttribute.AttributeName + " Attribute in " + node.Name + " related to " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name); } object resultCastedToTargetPropertyType; try { resultCastedToTargetPropertyType = Convert.ChangeType(nodeAttributeValue, T_Types[0]); } catch (FormatException) // if it can not cast result(string) to type of property. { throw new FormatException("Can not convert Invalid string to " + T_Types[0].FullName + " " + propertyInfo.Name); } catch (Exception ex) { throw new Exception("Unhandled Exception : " + ex.Message); } result.Add(resultCastedToTargetPropertyType); } } } #endregion Property_Is_IEnumerable<SimpleClass> if (result == null || result.Count == 0) { throw new Exception("Cannot fill " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name + " because it is null."); } propertyInfo.SetValue(targetObject, result, null); } } #endregion Property_IsNOT_IEnumerable } return(targetObject); } } #endregion targetObject_Defined_XPath #region targetObject_NOTDefined_XPath else // Object doesen't have xpath attribute { throw new MissingXPathException("Type T must define HasXPath attribute and include properties with XPath attribute."); } #endregion targetObject_NOTDefined_XPath }
/*private void downloadGamefrontMap(string mapFolder, Map m, string downloadUrl) * { * // Download mod listing page * string tempMainPagePath = Path.Combine(TempDir, mapFolder + "GamefrontMainPage.html"); * client = new MapInstallClient(m); * client.DownloadFile(downloadUrl, tempMainPagePath); * * // TODO: Parse correct page element for download link * HtmlAgilityPack.HtmlDocument gamefrontMainPage = new HtmlAgilityPack.HtmlDocument(); * gamefrontMainPage.LoadHtml(File.ReadAllText(tempMainPagePath)); * HtmlAgilityPack.HtmlNode downloadButton = gamefrontMainPage.GetElementbyId("downloadmirrorstoggle"); * string downloadPageUrl = Url.Combine(moddbBaseUrl, downloadButton.GetAttributeValue("href", "")); * }*/ private void DownloadModdbMap(string mapFolder, Map m, string downloadUrl) { // Download mod listing page string tempMainPagePath = Path.Combine(TempDir, mapFolder + "ModdbMainPage.html"); client = new MapInstallClient(m); client.DownloadFile(downloadUrl, tempMainPagePath); // Scan for download button and retrieve link HtmlAgilityPack.HtmlDocument moddbMainPage = new HtmlAgilityPack.HtmlDocument(); moddbMainPage.LoadHtml(File.ReadAllText(tempMainPagePath)); HtmlAgilityPack.HtmlNode downloadButton = moddbMainPage.GetElementbyId("downloadmirrorstoggle"); string downloadPageUrl = Url.Combine(ModdbBaseUrl, downloadButton.GetAttributeValue("href", "")); // Download download page string tempDownloadPagePath = Path.Combine(TempDir, mapFolder + "DownloadPage.html"); client.DownloadFile(downloadPageUrl, tempDownloadPagePath); // Scan for a tags in download page HtmlAgilityPack.HtmlDocument moddbDownloadPage = new HtmlAgilityPack.HtmlDocument(); moddbDownloadPage.LoadHtml(File.ReadAllText(tempDownloadPagePath)); IEnumerable <HtmlAgilityPack.HtmlNode> anchors = moddbDownloadPage.DocumentNode.Descendants("a"); if (anchors != null) { // Regex matches text displayed on page for correct download link Regex downloadFile = new Regex(@"download (.*)\.(.*)"); foreach (HtmlNode a in anchors) { Match match = downloadFile.Match(a.InnerText); if (match.Success) { string filename = match.Groups[1].ToString(); string fileExtension = match.Groups[2].ToString(); string destFile = Path.Combine(TempDir, filename + "." + fileExtension); string realDownloadUrl = Url.Combine(ModdbBaseUrl, a.GetAttributeValue("href", "")); client.DownloadFileCompleted += client_DownloadFileCompleted; client.DownloadProgressChanged += client_DownloadProgressChanged; //MessageBox.Show("File will start downloading.", "Downloading " + m.Name); client.DownloadFileAsync(new Uri(realDownloadUrl), destFile); client.downloadedFile = destFile; // Show progress bar progBarMapDownload.Visible = true; // Show map name lblMapInstalling.Visible = true; lblMapInstalling.Text = m.Name; lblInstallStatus.Visible = true; lblInstallStatus.Text = "Downloading map..."; // Exit this now as we have found and processed our match // Any other matches in the HTML are irrelevant break; } } } }
void ExtractUsers(string documentText) { try { HtmlAgilityPack.HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument(); htmlDocument.LoadHtml(documentText); HtmlAgilityPack.HtmlNode bloodDonor = htmlDocument.DocumentNode.SelectSingleNode("//*[@id='regmain']"); HtmlAgilityPack.HtmlNode linkPage = htmlDocument.DocumentNode.SelectSingleNode("//*[@id='regPage']/div/div/div/a[4]"); //linkPageCount = linkPage.InnerLength; HtmlAgilityPack.HtmlNode userTable = htmlDocument.DocumentNode.SelectSingleNode("//*[@id='regPage']/div/table"); HtmlAgilityPack.HtmlNodeCollection userTableCount = userTable.SelectNodes("./tr"); for (int k = 2; k <= userTableCount.Count(); k++) { HtmlAgilityPack.HtmlNode moreBtn = userTable.SelectSingleNode("./tr[" + k + "]/td[6]/a"); string linkBtn = moreBtn.GetAttributeValue("onclick", null); string[] mainLink = linkBtn.Split('b'); string moreLink = mainLink[1]; using (WebClient client = new WebClient()) { var link = client.DownloadString("http://bloodhelpers.com/b" + moreLink); HtmlAgilityPack.HtmlDocument userDocument = new HtmlAgilityPack.HtmlDocument(); userDocument.LoadHtml(link); HtmlAgilityPack.HtmlNode selectTable = userDocument.DocumentNode.SelectSingleNode("//*[@id='search']/form/table"); //User Name HtmlAgilityPack.HtmlNode userName = selectTable.SelectSingleNode("./tr[1]/td[2]"); string Name = userName.InnerText; //User Email HtmlAgilityPack.HtmlNode userEmail = selectTable.SelectSingleNode("./tr[2]/td[2]"); string[] Mail = userEmail.InnerHtml.Split('='); string addMail = Mail[1] + "=" + Mail[2]; string Email = addMail.Replace("border", ""); //fuction to convert png image into jpg //User BloodGroup HtmlAgilityPack.HtmlNode userBloodGroup = selectTable.SelectSingleNode("./tr[3]/td[2]"); string BloodGroup = userBloodGroup.InnerText; //User Gender HtmlAgilityPack.HtmlNode userGender = selectTable.SelectSingleNode("./tr[4]/td[2]"); string Gender = userGender.InnerText; //User Age HtmlAgilityPack.HtmlNode userAge = selectTable.SelectSingleNode("./tr[5]/td[2]"); int Age = Int32.Parse(userAge.InnerText.Replace("Years", "")); //User City HtmlAgilityPack.HtmlNode userCity = selectTable.SelectSingleNode("./tr[6]/td[2]"); string City = userCity.InnerText; //User Mobile Number HtmlAgilityPack.HtmlNode userMobile = selectTable.SelectSingleNode("./tr[7]/td[2]"); string[] num = userMobile.InnerHtml.Split('='); string addNum = num[1] + "=" + num[2]; string mobileNumber = addNum.Replace("border", ""); //function to convert png image into jpg //User Land Line Number HtmlAgilityPack.HtmlNode userLandLine = selectTable.SelectSingleNode("./tr[8]/td[2]"); string[] landNum = userLandLine.InnerHtml.Split('='); string addLandNum = landNum[1] + "=" + landNum[2]; string landLineNum = addLandNum.Replace("border", ""); //Function to convert png image into jpg //User Last Donation Date HtmlAgilityPack.HtmlNode userLastDonationDate = selectTable.SelectSingleNode("./tr[9]/td[2]"); string LastDonationDate = userLastDonationDate.InnerText; //Store Data in DataBase StoreUserData(Name, Email, BloodGroup, Gender, Age, City, mobileNumber, landLineNum, LastDonationDate); } } } catch { } }
public ParsedLink(HtmlAgilityPack.HtmlNode node) { Url.Add(node.GetAttributeValue("href", "")); InnerText = node.InnerText; DeviantArt = Url.Contains("deviantart"); Uri uriResult; bool validLinkResult = Uri.TryCreate(Url[0], UriKind.Absolute, out uriResult) && (uriResult.Scheme == Uri.UriSchemeHttp || uriResult.Scheme == Uri.UriSchemeHttps); if (validLinkResult) { JValid = uriResult.Host.Contains("deviantart"); } else { JValid = false; } Valid = (JValid && InnerText.Contains("ource")) || InnerText.Contains("ource") || JValid; Match match = Regex.Match(InnerText, @"(\d+)"); if (!int.TryParse(match.Groups[1].Value, out Source)) { Source = -1; } if (Valid && !JValid && !DeviantArt) { var parent = node.ParentNode; var sibling = parent.NextSibling; while (sibling != null) { if (sibling.Name == "a") { break; } // Check to see if we've moved onto another source's link if (sibling.Name == "b") { var links = sibling.Descendants("a"); if (links.Any(l => l.InnerHtml.Contains("ource"))) { break; } } if (sibling.Name == "div") { var links = sibling.Descendants("a"); var possibleImageLinks = links.FirstOrDefault(l => new [] { "jpg", "jpeg", "png", "gif" }.Any(e => l.GetAttributeValue("href", "").ToLower().Contains(e))); if (possibleImageLinks != null) { if (!AlternativeProposed) { OriginalUrl = Url[0]; Url.Clear(); } Url.Add(possibleImageLinks.GetAttributeValue("href", "")); AlternativeProposed = true; JValid = true; } } sibling = sibling.NextSibling; } } }
/// <summary> /// Fill an object and go through it's properties and fill them too. /// </summary> /// <typeparam name="T">Type of object to want to fill. It should have atleast one property that defined XPath.</typeparam> /// <param name="htmlDocument">If htmlDocument includes data , leave this parameter null. Else pass your specific htmldocument.</param> /// <returns>Returns an object of type T including Encapsulated data.</returns> public T GetEncapsulatedData <T>(HtmlDocument htmlDocument = null) { HtmlDocument source = null; if (htmlDocument == null) { source = OwnerDocument; } else { source = htmlDocument; } T targetObject = Activator.CreateInstance <T>(); #region targetObject_Defined_XPath if (Tools.IsDefinedAttr(typeof(T), (typeof(HasXPathAttribute))) == true) // Object has xpath attribute (Defined HasXPath) { // Store list of properties that defined xpath attribute IEnumerable <PropertyInfo> validProperties = Tools.GetPropertiesDefinedXPath(typeof(T)); foreach (PropertyInfo propertyInfo in validProperties) { XPathAttribute xPathAttribute = (propertyInfo.GetCustomAttributes(typeof(XPathAttribute), false) as IList)[0] as XPathAttribute; // Get xpath attribute from valid properties #region Property_IsNOT_IEnumerable if (Tools.IsIEnumerable(propertyInfo) == false) // Property is None-IEnumerable { HtmlNode htmlNode = source.DocumentNode.SelectSingleNode(xPathAttribute.XPath); #region Property_Is_HasXPath_UserDefinedClass if (Tools.IsDefinedAttr(propertyInfo.PropertyType, (typeof(HasXPathAttribute))) == true) // Property is None-IEnumerable HasXPath-user-defined class { HtmlDocument innerHtmlDocument = new HtmlDocument(); innerHtmlDocument.LoadHtml(htmlNode.InnerHtml); MethodInfo getEncapsulatedData = Tools.GetMethodByItsName(typeof(HtmlNode), "GetEncapsulatedData").MakeGenericMethod(propertyInfo.PropertyType); object o = getEncapsulatedData.Invoke(innerHtmlDocument.DocumentNode, new object[] { innerHtmlDocument }); propertyInfo.SetValue(targetObject, o, null); } #endregion Property_Is_HasXPath_UserDefinedClass #region Property_Is_SimpleType // Property is None-IEnumerable value-type or .Net class or user-defined class (does not deifned xpath and shouldn't have property that defined xpath ) else { string result = string.Empty; if (xPathAttribute.AttributeName == null) // It target None-IEnumerable value of HTMLTag { result = Tools.GetNodeValueBasedOnXPathReturnType <string>(htmlNode, xPathAttribute); } else // It target None-IEnumerable attribute of HTMLTag { result = htmlNode.GetAttributeValue(xPathAttribute.AttributeName, "Html Tag Attribute Not Specified"); } propertyInfo.SetValue(targetObject, Convert.ChangeType(result, propertyInfo.PropertyType), null); } #endregion Property_Is_SimpleType } #endregion Property_IsNOT_IEnumerable #region Property_Is_IEnumerable else // Property is IEnumerable<T> { IList <Type> T_Types = Tools.GetGenericTypes(propertyInfo) as IList <Type>; // Get T type if (T_Types == null || T_Types.Count == 0) { throw new NotImplementedException(); } else if (T_Types.Count > 1) { throw new NotImplementedException(); } else if (T_Types.Count == 1) // It is NOT something like Dictionary<Tkey , Tvalue> { HtmlNodeCollection nodeCollection = source.DocumentNode.SelectNodes(xPathAttribute.XPath); IList result = Tools.CreateIListOfType(T_Types[0]); #region Property_Is_IEnumerable<HasXPath-UserDefinedClass> if (Tools.IsDefinedAttr(T_Types[0], typeof(HasXPathAttribute)) == true) // T is IEnumerable HasXPath-user-defined class (T type Defined XPath properties) { foreach (HtmlNode node in nodeCollection) { HtmlDocument innerHtmlDocument = new HtmlDocument(); innerHtmlDocument.LoadHtml(node.InnerHtml); MethodInfo getEncapsulatedData = Tools.GetMethodByItsName(typeof(HtmlNode), "GetEncapsulatedData").MakeGenericMethod(T_Types[0]); object o = getEncapsulatedData.Invoke(innerHtmlDocument.DocumentNode, new object[] { innerHtmlDocument }); result.Add(o); } } #endregion Property_Is_IEnumerable<HasXPath-UserDefinedClass> #region Property_Is_IEnumerable<SimpleClass> else // T is value-type or .Net class or user-defined class ( without xpath ) { if (xPathAttribute.AttributeName == null) // It target value { result = Tools.GetNodesValuesBasedOnXPathReturnType(nodeCollection, xPathAttribute, T_Types[0]); } else // It target attribute { foreach (HtmlNode node in nodeCollection) { result.Add(Convert.ChangeType(node.GetAttributeValue(xPathAttribute.AttributeName, "Html Tag Attribute Not Specified"), T_Types[0])); } } } #endregion Property_Is_IEnumerable<SimpleClass> propertyInfo.SetValue(targetObject, result, null); } } #endregion Property_IsNOT_IEnumerable } return(targetObject); } #endregion targetObject_Defined_XPath #region targetObject_NOTDefined_XPath else // Object doesen't have xpath attribute { throw new NotImplementedException(); } #endregion targetObject_NOTDefined_XPath }