GetAttributeValue() public method

Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
public GetAttributeValue ( string name, bool def ) : bool
name string The name of the attribute to get. May not be null.
def bool The default value to return if not found.
return bool
Beispiel #1
        public bool SetValue(HtmlNode n, string value)
            if (n is HtmlNode && n.Name == "select")
                foreach (HtmlNode o in n.SelectNodes("option"))
                    o.SetAttributeValue("selected", o.GetAttributeValue("value", "").Equals(value) ? "selected" : "");
                return true;

            if (n is HtmlNode && n.Name == "input")
                switch (n.GetAttributeValue("type", ""))
                    case "radio":
                        n.SetAttributeValue("checked", n.GetAttributeValue("value", "").Equals(value) ? "checked" : "");
                        n.SetAttributeValue("value", value);
                n.SetAttributeValue("value", value);
                return true;

            return false;
Beispiel #2
		public async Task<HtmlNode> VisitAsync(VisitingContext context, HtmlNode node)
			// We're only interested in stylesheets.
			if (node.GetAttributeValue("rel", null) != "stylesheet")
				return node;
			var href = node.GetAttributeValue("href", null);
			if (href == null)
				return node;
			var hrefUri = new Uri(href, UriKind.RelativeOrAbsolute);
			if (!hrefUri.IsAbsoluteUri)
				hrefUri = new Uri(context.Address, hrefUri);

			// Get the stylesheet and insert it inline.
			var content = default(string);
				content = await context.WebClient.DownloadAsync(hrefUri);
			catch (WebException)
				return node;
			content = "<style>" + content + "</style>";
			return HtmlNode.CreateNode(content);
Beispiel #3
        /// <summary>
        /// </summary>
        /// <param name="formNode"></param>
        /// <param name="session"></param>
        /// <param name="baseUrl"></param>
        /// <param name="charset">The character set used in the previoius response (from which the form originates).</param>
        public Form(HtmlNode formNode, ISession session, Uri baseUrl, string charset)
            Condition.Requires(formNode, "formNode").IsNotNull();
              Condition.Requires(session, "session").IsNotNull();
              Condition.Requires(baseUrl, "baseUrl").IsNotNull();

              if (!formNode.Name.Equals("form", StringComparison.OrdinalIgnoreCase))
            throw new ArgumentException(string.Format("Cannot create HTML form from '{0}' node.", formNode.Name));

              Action = new Uri(baseUrl, formNode.GetAttributeValue("action", ""));
              Method = formNode.GetAttributeValue("method", "get");
              Session = session;
              BaseUrl = baseUrl;
              ResponseCharset = charset;

              string enctype = formNode.GetAttributeValue("enctype", null);
              EncodingType = (enctype != null ? new MediaType(enctype) : MediaType.ApplicationFormUrlEncoded);

              AcceptCharset = formNode.GetAttributeValue("accept-charset", null);

              Values = new Hashtable();
              SubmitElements = new List<SubmitElement>();

Beispiel #4
 private static int GetPageNumber(HtmlNode threadNode)
     if (threadNode != null && !string.IsNullOrEmpty(threadNode.GetAttributeValue("value", string.Empty)))
         return Convert.ToInt32(threadNode.GetAttributeValue("value", string.Empty));
     return 1;
        public override string Convert(HtmlNode node)
            string alt = node.GetAttributeValue("alt", string.Empty);
            string src = node.GetAttributeValue("src", string.Empty);
            string title = this.ExtractTitle(node);

            title = title.Length > 0 ? string.Format(" \"{0}\"", title) : "";

            return string.Format("![{0}]({1}{2})", alt, src, title);
Beispiel #6
 public static Note Parse(HtmlNode Node)
     string Grade = Node.InnerText.Trim();
     string Name = Node.GetAttributeValue("title", "Névtelen jegy");
     string Href = Node.GetAttributeValue("href", "");
     Match Match = Regex.Match(Href, "jegyId=(?<id>[0-9]+)", RegexOptions.IgnoreCase);
     int ID = int.Parse(Match.Groups["id"].Value);
     string T = Node.GetAttributeValue("class", "jegy2");
     NoteType Type = (NoteType)(int.Parse(T.Substring(T.Length - 1, 1)));
     return new Note(ID, Grade, Name, Type);
Beispiel #7
 public static string GetTargetFromFilter(HtmlNode node, IFilter filter)
     switch (filter.GetNodeType())
         case NodeType.Image:
             return node.GetAttributeValue("src", "");
         case NodeType.Link:
             return node.GetAttributeValue("href", "");
             return "";
Beispiel #8
        /// <summary>
        ///     Parses a forum post in a thread.
        /// </summary>
        /// <param name="postNode">The post HTML node.</param>
        public void Parse(HtmlNode postNode)
            User = ForumUserEntity.FromPost(postNode);

            HtmlNode postDateNode =
                    .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("postdate"));
            string postDateString = postDateNode == null ? string.Empty : postDateNode.InnerText;
            if (postDateString != null)
                PostDate = postDateString.WithoutNewLines().Trim();

            PostIndex = ParseInt(postNode.GetAttributeValue("data-idx", string.Empty));

            var postId = postNode.GetAttributeValue("id", string.Empty);
            if (!string.IsNullOrEmpty(postId) && postId.Contains("#"))
                PostId =
                    Int64.Parse(postNode.GetAttributeValue("id", string.Empty)
                        .Replace("post", string.Empty)
                        .Replace("#", string.Empty));
            else if (!string.IsNullOrEmpty(postId) && postId.Contains("post"))
                PostId =
                    Int64.Parse(postNode.GetAttributeValue("id", string.Empty)
                        .Replace("post", string.Empty));
                PostId = 0;
            var postBodyNode = postNode.Descendants("td")
                .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("postbody"));
            PostHtml = postBodyNode.InnerHtml;
            HtmlNode profileLinksNode =
                        .FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("postlinks"));
            HtmlNode postRow =

            if (postRow != null)
                HasSeen = postRow.GetAttributeValue("class", string.Empty).Contains("seen");

            User.IsCurrentUserPost =
                    .FirstOrDefault(node => node.GetAttributeValue("alt", string.Empty).Equals("Edit")) != null;
		static AnchorAnalysis AnalyseAnchor(HtmlNode anchor)
			var href = anchor.GetAttributeValue("href", null);
			var rel = anchor.GetAttributeValue("rel", null);
			var title = anchor.GetAttributeValue("title", null);
			var text = anchor.InnerText;

			// todo: determine if its an offsite link?
			// determine if it contains a title, has text - if not, image? adivse

			return new AnchorAnalysis(text, title, href, rel, new Message[0]);
Beispiel #10
        /// <summary>
        /// Parses a thread HTML node to extract the information from it.
        /// </summary>
        /// <param name="threadNode">The thread HTML node.</param>
        public void Parse(HtmlNode threadNode)
            this.Name = WebUtility.HtmlDecode(threadNode.Descendants("a").FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("thread_title")).InnerText);

            this.KilledBy = threadNode.Descendants("a").LastOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("author")).InnerText;
            this.IsSticky = threadNode.Descendants("td").Any(node => node.GetAttributeValue("class", string.Empty).Contains("title_sticky"));
            this.IsLocked = threadNode.GetAttributeValue("class", string.Empty).Contains("closed");
            this.CanMarkAsUnread = threadNode.Descendants("a").Any(node => node.GetAttributeValue("class", string.Empty).Equals("x"));
            this.HasBeenViewed = this.CanMarkAsUnread;
            this.Author = threadNode.Descendants("td").FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("author")).InnerText;
            if (threadNode.Descendants("a").Any(node => node.GetAttributeValue("class", string.Empty).Equals("count")))
                this.RepliesSinceLastOpened = Convert.ToInt32(threadNode.Descendants("a").FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("count")).InnerText);
            if (threadNode.Descendants("td").Any(node => node.GetAttributeValue("class", string.Empty).Contains("replies")))
                this.ReplyCount = Convert.ToInt32(threadNode.Descendants("td").FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Contains("replies")).InnerText);
                this.ReplyCount = 1;

            // Isn't this user configurable?
            this.TotalPages = (this.ReplyCount / 40) + 1;
            this.Location = Constants.BASE_URL + threadNode.Descendants("a").FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("thread_title")).GetAttributeValue("href",string.Empty) + Constants.PER_PAGE;
            this.ThreadId = Convert.ToInt64(threadNode.Descendants("a").FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("thread_title")).GetAttributeValue("href",string.Empty).Split('=')[1]);
            this.ImageIconLocation = threadNode.Descendants("td").FirstOrDefault(node => node.GetAttributeValue("class", string.Empty).Equals("icon")).Descendants("img").FirstOrDefault().GetAttributeValue("src", string.Empty);
 private static bool IsUnwantedLanguageDiv(HtmlNode div)
     return div.GetAttributeValue("class", "").Equals("libCScode")
Beispiel #12
        private MarketRateModel BuildMarketRate(HtmlNode monthNode)
            var result = new MarketRateModel();
            string value = "";
            var dataCode = monthNode.GetAttributeValue("href", "");
            if ("" == dataCode)
                value = monthNode.NextSibling.NextSibling.InnerText.Trim();
                value = value.Split(' ')[0];
                result.Type = RateType.StockShangzheng;
                result.Rate = decimal.Parse(value);
            else if ("" == dataCode)
                value = monthNode.NextSibling.NextSibling.InnerText.Trim();
                value = value.Split(' ')[0];
                result.Type = RateType.StockShenzhen;
                result.Rate = decimal.Parse(value);
                return null;

            result.CreateTime = DateTime.Now;
            result.RateDay = DateTime.Now.Date;
            result.Source = SourceType.eIfeng;

            return result;
Beispiel #13
 /// <summary>
 /// 递归遍历内容中的图片
 /// </summary>
 /// <param name="node"></param>
 /// <returns></returns>
 public static void EachImages(HtmlNode node, string baseUri = "")
     if (node.HasChildNodes)
         foreach (HtmlNode nn in node.ChildNodes)
             EachImages(nn, baseUri);
     else if (node.Name == "img")
         string url = node.GetAttributeValue("src", "");
         if (url == "") return;
         string exe = Path.GetExtension(url).TrimStart(new char[] { '.' });
         string fileName = Guid.NewGuid().ToString() + "." + exe;
         int day = DateTime.Now.Day;
         string fullName = string.Format("autoimages\\{0}\\{3}\\{1}\\{2}." + exe, DateTime.Now.Year, day <= 10 ? 1 : (day <= 20 ? 2 : 3), Guid.NewGuid().ToString(), DateTime.Now.Month);
         string urlNew = "/" + fullName.Replace("\\", "/");
         node.SetAttributeValue("src", urlNew);
         Uri uri = baseUri == "" ? new Uri(url) : new Uri(new Uri(baseUri), url);
         SaveImg(uri.AbsoluteUri, fullName);
Beispiel #14
		public async Task<HtmlNode> VisitAsync(VisitingContext context, HtmlNode node)
			var src = node.GetAttributeValue("src", null);
			if (src == null)
				return node;

			// Take care if the src starts with two slashes.
			if (src.StartsWith("//"))
				src = "http:" + src;

			var srcUri = new Uri(src, UriKind.RelativeOrAbsolute);
			if (!srcUri.IsAbsoluteUri)
				srcUri = new Uri(context.Address, srcUri);

			// Get the script and insert it inline.
			var content = default(string);
				content = await context.WebClient.DownloadAsync(srcUri);
			catch (WebException)
				return node;
			content = "<script>" + content + "</script>";
			return HtmlNode.CreateNode(content);
Beispiel #15
 private string GetLanguageFromConfluenceClassAttribute(HtmlNode node)
     string val = node.GetAttributeValue("class", "");
     var rx = new System.Text.RegularExpressions.Regex(@"brush:\s?(:?.*);");
     var res = rx.Match(val);
     return res.Success ? res.Value : "";
 public StineLinkTreeNode(HtmlNode node)
     base.Text = HttpUtility.HtmlDecode(node.InnerHtml);
     URL = node.GetAttributeValue("href", "not_found");
     URL = HttpUtility.HtmlDecode(URL);
     HTML_NODE = node;
Beispiel #17
        private bool ClassesMatches(HtmlNode node)
            if (classes == null || classes.Length < 1)
                return true;

            string classString;

            if ((classString = node.GetAttributeValue("class", null)) != null)
                string[] nodeClasses = classString.Split(' ');
                if (nodeClasses.Length <= 0) return false;
                bool allMatch = false;
                foreach (string filterClass in classes)
                    bool localMatch = false;
                    foreach (string nodeClass in nodeClasses)
                        if (filterClass == nodeClass)
                            localMatch = true;
                    allMatch = localMatch;
                return allMatch;
            return false;
Beispiel #18
 private string GetLanguageFromHighlightClassAttribute(HtmlNode node)
     string val = node.GetAttributeValue("class", "");
     var rx = new System.Text.RegularExpressions.Regex("highlight-([a-zA-Z0-9]+)");
     var res = rx.Match(val);
     return res.Success ? res.Value : "";
 private ICollection<PostNodeBase> CreateLinkAttrNode(HtmlNode node, ICollection<PostNodeBase> res)
     var linkUri = GetLinkText(node.GetAttributeValue("href", null));
     var uriService = Services.GetServiceOrThrow<IMakabaUriService>();
     var detectedLink = uriService.TryParsePostLink(linkUri);
     if (detectedLink != null)
         return CreateNode(res, new PostNodeBoardLinkAttribute()
             BoardLink = detectedLink
     var youtubeService = Services.GetServiceOrThrow<IYoutubeIdService>();
     var youtubeId = youtubeService.GetYoutubeIdFromUri(linkUri);
     if (youtubeId != null)
         return CreateNode(res, new PostNodeBoardLinkAttribute()
             BoardLink = new YoutubeLink()
                 Engine = CoreConstants.Engine.Makaba,
                 YoutubeId = youtubeId
     return CreateNode(res, new PostNodeLinkAttribute()
         LinkUri = linkUri
Beispiel #20
        public static BookInfo AmazonSearchBook(string title, string author)
            BookInfo result = null;

            string authorTrim = "";

            Regex regex = new Regex(@"( [A-Z]\.)", RegexOptions.Compiled);
            Match match = Regex.Match(author, @"( [A-Z]\.)", RegexOptions.Compiled);

            if (match.Success)
                foreach (Match m in regex.Matches(author))
                    authorTrim = author.Replace(m.Value, m.Value.Trim());
                authorTrim = author;
            if (title.IndexOf(" (") >= 0)
                title = title.Substring(0, title.IndexOf(" ("));

            string searchUrl = @"" +
                               Uri.EscapeDataString(title + " " + authorTrim + " kindle edition");

            HAP.HtmlDocument searchDoc = new HAP.HtmlDocument();
            HAP.HtmlNode node = searchDoc.DocumentNode.SelectSingleNode("//li[@id='result_0']");
            //At least attempt to verify it might be the same book?
            //Ignore case of title
            if (node != null && node.InnerText.IndexOf(title, StringComparison.OrdinalIgnoreCase) >= 0)
                string foundASIN = node.GetAttributeValue("data-asin", "");
                node = node.SelectSingleNode(".//div/div/div/div[@class='a-fixed-left-grid-col a-col-right']/div/a");
                if (node != null)
                    result           = new BookInfo(node.InnerText, author, foundASIN);
                    result.amazonUrl = node.GetAttributeValue("href", ""); // Grab the true link for good measure

Beispiel #21
        private static ThreadPostMetadata ParseIsIgnored(this ThreadPostMetadata post, HtmlNode postNode)
            post.IsIgnored = false;
            var classAttribute = postNode.GetAttributeValue("class", string.Empty);
            if (classAttribute.ToLower().Contains("ignored"))
                post.IsIgnored = true;

            return post;
Beispiel #22
 private static DocumentObject AddHyperlink(DocumentObject section, HtmlNode node)
     string href = node.GetAttributeValue("href", "");
     Hyperlink link;
     if (href.StartsWith("#"))
         link = GetParagraph(section).AddHyperlink(href.Substring(1), HyperlinkType.Bookmark);
         link = GetParagraph(section).AddHyperlink(href, HyperlinkType.Web);
     return link;
Beispiel #23
        private static int ParseProjectId(HtmlNode node)
            var link = node.GetAttributeValue("href", null);
            var splicedUrl = link.Split('/');
            if (splicedUrl.Length >= 4 && splicedUrl[4] != "users")
                return Convert.ToInt32(splicedUrl[4]);

            return 0;
        private static ExternalReference GetExternalReference(HtmlNode externalReferenceNode, int index)
            string referenceUrl = externalReferenceNode.GetAttributeValue("href", string.Empty);
            string referenceId = GetExternalReferenceId(index);

            externalReferenceNode.InnerHtml = string.Format("[{0}]", index);
            externalReferenceNode.SetAttributeValue("href", "#" + referenceId);
            externalReferenceNode.SetAttributeValue("name", referenceId + BackLinkReferenceIdSuffix);

            return new ExternalReference { Index = index, Url = referenceUrl };
		protected DateTime RetrieveToDateTime( HtmlNode node , string attribute ) {
			var value = node.GetAttributeValue( attribute , string.Empty );

			if ( string.IsNullOrEmpty( value ) ) {
				return DateTime.Now;

			var epoch = new DateTime( 1970 , 1 , 1 , 0 , 0 , 0 );

			return epoch.AddSeconds( double.Parse( value ) );
        public Item(HtmlNode code)
            htmlCode = code;
            name = code.GetAttributeValue("data-name", ""); //full item name from the source (may include type)
            id = code.GetAttributeValue("data-id", ""); //either a number or "stock"

            //"unusual x", "unique x", "strange x", etc are all possibilities
            if (itemTypes.Contains(name.Split(' ')[0]))
                //extract the type from the full itemname
                type = name.Remove(name.IndexOf(' '));
                //strip the type from the full itemname
                name = name.Remove(0, type.Length + 1);

            level = code.GetAttributeValue("data-subtitle", "");

            HtmlNode paintNode = code.SelectSingleNode("div[@class='paint']");
            paint = paintNode != null ? paintNode.GetAttributeValue("style", "").Split(':')[1] : "";
 private static void AddExCssStyle(this IXLWorksheet worksheet, int rowId, int colId, HtmlNode cell)
     var css = cell.GetAttributeValue("style", "");
     var parser = new StylesheetParser();
     var stylesheet = parser.Parse(css);
     SimpleSelector color =
         stylesheet.RuleSets.SelectMany(x => x.Selectors)
                   .SelectMany(x => x.SimpleSelectors)
                   .FirstOrDefault(x => x.ElementName == "color");
     string thing = (color == null) ? "" : color.Child.Pseudo;
     worksheet.Cell(rowId, colId).Style.Font.FontColor = XLColor.FromName(thing);
        private bool TryParseImage(HtmlNode node, out Image image)
            image = null;

            var name = node.Name.ToLowerInvariant();
            if (name == "img") {
                var empty = "";
                var source = node.GetAttributeValue("src", empty);
                if (source != empty) {
                    int width;
                    int.TryParse(node.GetAttributeValue("width", empty), out width);

                    int height;
                    int.TryParse(node.GetAttributeValue("height", empty), out height);
                    image = new Image(source, width, height);
                    return true;

            return false;
        protected override Uri GetFullPageUrl(HtmlNode selectOption, MangaBook mangaBook)
            string selectOptionValueAttribute = selectOption.GetAttributeValue("value", "");

            string pageUrl = String.Format("{0}/{1}/{2}/{3}/{4}.html",

            return String.IsNullOrEmpty(selectOptionValueAttribute) ? null : new Uri(HostUri, pageUrl);
        public static int AddTd(this IXLWorksheet worksheet, int rowId, int colId, HtmlNode cell)
            // add text
            worksheet.Cell(rowId, colId).Value = cell.InnerText;

            // add data type
            worksheet.Cell(rowId, colId).DataType = GetDataType(cell.InnerText);

            // colspan merge
            int colspan = cell.GetAttributeValue("colspan", 1);
            if (colspan > 1)
                var mergeUs = worksheet.Range(rowId, colId, rowId, colId + (colspan - 1));

            // Try style with CssStyleCollection
            var stylernater = CssStyleTools.Create();
            stylernater.Value = cell.GetAttributeValue("style", "");
            string color = stylernater["color"];
            if (!string.IsNullOrWhiteSpace(color))
                // rgb(
                //var snot = "rgb(255, 0, 0)";
                var snot = color;
                var poo = snot.Substring(4, snot.Length-4);
                var foo = poo.Substring(0, poo.Length - 1);
                string[] noo = foo.Split(',');
                var goo = noo.Select(x => x.Trim());
                List<int> hoo = goo.Select(x => int.Parse(x)).ToList();
                worksheet.Cell(rowId, colId).Style.Font.FontColor = XLColor.FromArgb(hoo[0], hoo[1], hoo[2]); //.FromName(color);
            // YAAHGHGhghghgh. This is just as bad as ExCss!

            // add style?
            //worksheet.AddExCssStyle(rowId, colId, cell);

            return colspan;
 public static TextVisualProperties Update(this TextVisualProperties properties, HtmlNode element, CSS css)
     var list = new List<string>
     string attributeValue = element.GetAttributeValue("class", string.Empty);
     if (!string.IsNullOrEmpty(attributeValue))
         list.Add(element.Name + "." + attributeValue);
         list.Add("." + attributeValue);
     return properties.UpdateCss(css, list.ToArray());
Beispiel #32
        //static int nLevels = 64;
        //static Size testPadding = new Size(32, 32);
        //static double hitThreshold = 0;
        //static int groupThreshold = 2;
        //static double scaleStep = 1.05;
        //static bool useMeanShiftGrouping = false;

        public static List <PSM4TxSample> LoadSamples(string folder)
            //Load all samples from all the folders
            List <PSM4TxSample> samples = new List <PSM4TxSample>(); List <string> missing_xml_samples = new List <string>();

            string[] files = Directory.EnumerateFiles(folder)
                             .Where(file => file.ToLower().EndsWith(".bmp") || file.ToLower().EndsWith(".jpg"))

            for (int k = 0; k < files.Length; k++)
                string img_file = files[k].ToLower();
                string xml_file = img_file.EndsWith("bmp") ? img_file.Replace(".bmp", "_data.xml") : img_file.Replace(".jpg", "_data.xml");

                if (!File.Exists(xml_file))
                    missing_xml_samples.Add(img_file); continue;

                PSM4TxSample sample = new PSM4TxSample(img_file);

                //  .// Means descendants, which includes children of children (and so forth).
                //  ./ Means direct children.
                //If a XPath starts with a / it becomes relative to the root of the document;
                //to make it relative to your own node start it with ./.
                HtmlAgilityPack.HtmlDocument       doc         = new HtmlAgilityPack.HtmlDocument(); doc.Load(xml_file);
                HtmlAgilityPack.HtmlNodeCollection shape_nodes = doc.DocumentNode.SelectNodes("//shape");
                foreach (HtmlAgilityPack.HtmlNode shape_node in shape_nodes)
                    HtmlAgilityPack.HtmlNode blocktext_node = shape_node.SelectSingleNode(".//blocktext");
                    HtmlAgilityPack.HtmlNode text_node      = blocktext_node.SelectSingleNode("./text");
                    HtmlAgilityPack.HtmlNode data_node      = shape_node.SelectSingleNode(".//data");
                    HtmlAgilityPack.HtmlNode extent_node    = data_node.SelectSingleNode("./extent");

                    string s_x = extent_node.GetAttributeValue("X", "");
                    string s_y = extent_node.GetAttributeValue("Y", "");
                    string s_w = extent_node.GetAttributeValue("Width", "");
                    string s_h = extent_node.GetAttributeValue("Height", "");

                    if (text_node.InnerText.ToLower() == "isolator")
                        sample.SetIsolator(s_x, s_y, s_w, s_h); //(787.18896484375, 1370.0)
                    else if (text_node.InnerText.ToLower() == "arrayblock")
                        sample.SetArrayBlock(s_x, s_y, s_w, s_h); //(1052.86828613281, 1201.8359375)
                    else if (text_node.InnerText.ToLower() == "aperture")
                        sample.SetAperture(s_x, s_y, s_w, s_h); //(209.156982421875, 1885.03271484375)


            using (StreamWriter sw = new StreamWriter("missing_xml.txt"))
                for (int i = 0; i < missing_xml_samples.Count; i++)

Beispiel #33
        /// <summary>
        /// Fill an object and go through it's properties and fill them too.
        /// </summary>
        /// <param name="targetType">Type of object to want to fill. It should have atleast one property that defined XPath.</param>
        /// <param name="htmlDocument">If htmlDocument includes data , leave this parameter null. Else pass your specific htmldocument.</param>
        /// <returns>Returns an object of type targetType including Encapsulated data.</returns>
        public object GetEncapsulatedData(Type targetType, HtmlDocument htmlDocument = null)
            #region SettingPrerequisite

            if (targetType == null)
                throw new ArgumentNullException("Parameter targetType is null");

            HtmlDocument source = null;

            if (htmlDocument == null)
                source = OwnerDocument;
                source = htmlDocument;

            object targetObject;

            if (targetType.IsInstantiable() == false) // if it can not create instanse of T because of lack of constructor in type T.
                throw new MissingMethodException("Parameterless Constructor excpected for " + targetType.FullName);
                targetObject = Activator.CreateInstance(targetType);

            #endregion SettingPrerequisite

            #region targetObject_Defined_XPath
            if (targetType.IsDefinedAttribute(typeof(HasXPathAttribute)) == true) // Object has xpath attribute (Defined HasXPath)
                // Store list of properties that defined xpath attribute
                IEnumerable <PropertyInfo> validProperties = targetType.GetPropertiesDefinedXPath();
                if (validProperties.CountOfIEnumerable() == 0) // if no XPath property exist in type T while T defined HasXpath attribute.
                    throw new MissingXPathException("Type " + targetType.FullName +
                                                    " defined HasXPath Attribute but it does not have any property with XPath Attribte.");
                    // Fill targetObject variable Properties ( T targetObject )
                    foreach (PropertyInfo propertyInfo in validProperties)
                        // Get xpath attribute from valid properties
                        // for .Net old versions:
                        XPathAttribute xPathAttribute = (propertyInfo.GetCustomAttributes(typeof(XPathAttribute), false) as IList)[0] as XPathAttribute;

                        #region Property_IsNOT_IEnumerable
                        if (propertyInfo.IsIEnumerable() == false) // Property is None-IEnumerable
                            HtmlNode htmlNode = null;

                            // try to fill htmlNode based on XPath given
                                htmlNode = source.DocumentNode.SelectSingleNode(xPathAttribute.XPath);
                            catch // if it can not select node based on given xpath
                                throw new NodeNotFoundException("Cannot find node with giving XPath to bind to " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name);

                            if (htmlNode == null)
                                throw new NodeNotFoundException("Cannot find node with givig XPath to bind to " +
                                                                propertyInfo.PropertyType.FullName + " " + propertyInfo.Name);

                            #region Property_Is_HasXPath_UserDefinedClass
                            // Property is None-IEnumerable HasXPath-user-defined class
                            if (propertyInfo.PropertyType.IsDefinedAttribute(typeof(HasXPathAttribute)) == true)
                                HtmlDocument innerHtmlDocument = new HtmlDocument();


                                object o = GetEncapsulatedData(propertyInfo.PropertyType, innerHtmlDocument);

                                propertyInfo.SetValue(targetObject, o, null);
                            #endregion Property_Is_HasXPath_UserDefinedClass

                            #region Property_Is_SimpleType
                            // Property is None-IEnumerable value-type or .Net class or user-defined class.
                            // AND does not deifned xpath and shouldn't have property that defined xpath.
                                string result = string.Empty;

                                if (xPathAttribute.AttributeName == null) // It target None-IEnumerable value of HTMLTag
                                    result = Tools.GetNodeValueBasedOnXPathReturnType <string>(htmlNode, xPathAttribute);
                                else // It target None-IEnumerable attribute of HTMLTag
                                    result = htmlNode.GetAttributeValue(xPathAttribute.AttributeName, null);

                                if (result == null)
                                    throw new NodeAttributeNotFoundException("Can not find " +
                                                                             xPathAttribute.AttributeName + " Attribute in " + htmlNode.Name +
                                                                             " related to " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name);

                                object resultCastedToTargetPropertyType;

                                    resultCastedToTargetPropertyType = Convert.ChangeType(result, propertyInfo.PropertyType);
                                catch (FormatException)
                                    throw new FormatException("Can not convert Invalid string to " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name);
                                catch (Exception ex)
                                    throw new Exception("Unhandled Exception : " + ex.Message);

                                propertyInfo.SetValue(targetObject, resultCastedToTargetPropertyType, null);
                            #endregion Property_Is_SimpleType
                        #endregion Property_IsNOT_IEnumerable

                        #region Property_Is_IEnumerable
                        else // Property is IEnumerable<T>
                            IList <Type> T_Types = propertyInfo.GetGenericTypes() as IList <Type>; // Get T type

                            if (T_Types == null || T_Types.Count == 0)
                                throw new ArgumentException(propertyInfo.Name + " should have one generic argument.");

                            else if (T_Types.Count > 1)
                                throw new ArgumentException(propertyInfo.Name + " should have one generic argument.");

                            else if (T_Types.Count == 1) // It is NOT something like Dictionary<Tkey , Tvalue>
                                HtmlNodeCollection nodeCollection;

                                // try to fill nodeCollection based on given xpath.
                                    nodeCollection = source.DocumentNode.SelectNodes(xPathAttribute.XPath);
                                    throw new NodeNotFoundException("Cannot find node with givig XPath to bind to " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name);

                                if (nodeCollection == null || nodeCollection.Count == 0)
                                    throw new NodeNotFoundException("Cannot find node with givig XPath to bind to " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name);

                                IList result = T_Types[0].CreateIListOfType();

                                #region Property_Is_IEnumerable<HasXPath-UserDefinedClass>
                                if (T_Types[0].IsDefinedAttribute(typeof(HasXPathAttribute)) == true) // T is IEnumerable HasXPath-user-defined class (T type Defined XPath properties)
                                    foreach (HtmlNode node in nodeCollection)
                                        HtmlDocument innerHtmlDocument = new HtmlDocument();

                                        object o = GetEncapsulatedData(T_Types[0], innerHtmlDocument);

                                #endregion Property_Is_IEnumerable<HasXPath-UserDefinedClass>

                                #region Property_Is_IEnumerable<SimpleClass>
                                else // T is value-type or .Net class or user-defined class ( without xpath )
                                    if (xPathAttribute.AttributeName == null) // It target value
                                            result = Tools.GetNodesValuesBasedOnXPathReturnType(nodeCollection, xPathAttribute, T_Types[0]);
                                        catch (FormatException)
                                            throw new FormatException("Can not convert Invalid string in node collection to " + T_Types[0].FullName + " " + propertyInfo.Name);
                                        catch (Exception ex)
                                            throw new Exception("Unhandled Exception : " + ex.Message);
                                    else // It target attribute
                                        foreach (HtmlNode node in nodeCollection)
                                            string nodeAttributeValue = node.GetAttributeValue(xPathAttribute.AttributeName, null);
                                            if (nodeAttributeValue == null)
                                                throw new NodeAttributeNotFoundException("Can not find " + xPathAttribute.AttributeName + " Attribute in " + node.Name + " related to " +
                                                                                         propertyInfo.PropertyType.FullName + " " + propertyInfo.Name);

                                            object resultCastedToTargetPropertyType;

                                                resultCastedToTargetPropertyType = Convert.ChangeType(nodeAttributeValue, T_Types[0]);
                                            catch (FormatException) // if it can not cast result(string) to type of property.
                                                throw new FormatException("Can not convert Invalid string to " + T_Types[0].FullName + " " + propertyInfo.Name);
                                            catch (Exception ex)
                                                throw new Exception("Unhandled Exception : " + ex.Message);

                                #endregion Property_Is_IEnumerable<SimpleClass>

                                if (result == null || result.Count == 0)
                                    throw new Exception("Cannot fill " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name + " because it is null.");

                                propertyInfo.SetValue(targetObject, result, null);
                        #endregion Property_IsNOT_IEnumerable

            #endregion targetObject_Defined_XPath

            #region targetObject_NOTDefined_XPath
            else // Object doesen't have xpath attribute
                throw new MissingXPathException("Type T must define HasXPath attribute and include properties with XPath attribute.");
            #endregion targetObject_NOTDefined_XPath
Beispiel #34
        /*private void downloadGamefrontMap(string mapFolder, Map m, string downloadUrl)
         * {
         *  // Download mod listing page
         *  string tempMainPagePath = Path.Combine(TempDir, mapFolder + "GamefrontMainPage.html");
         *  client = new MapInstallClient(m);
         *  client.DownloadFile(downloadUrl, tempMainPagePath);
         *  // TODO: Parse correct page element for download link
         *  HtmlAgilityPack.HtmlDocument gamefrontMainPage = new HtmlAgilityPack.HtmlDocument();
         *  gamefrontMainPage.LoadHtml(File.ReadAllText(tempMainPagePath));
         *  HtmlAgilityPack.HtmlNode downloadButton = gamefrontMainPage.GetElementbyId("downloadmirrorstoggle");
         *  string downloadPageUrl = Url.Combine(moddbBaseUrl, downloadButton.GetAttributeValue("href", ""));
         * }*/

        private void DownloadModdbMap(string mapFolder, Map m, string downloadUrl)
            // Download mod listing page
            string tempMainPagePath = Path.Combine(TempDir, mapFolder + "ModdbMainPage.html");

            client = new MapInstallClient(m);
            client.DownloadFile(downloadUrl, tempMainPagePath);

            // Scan for download button and retrieve link
            HtmlAgilityPack.HtmlDocument moddbMainPage = new HtmlAgilityPack.HtmlDocument();
            HtmlAgilityPack.HtmlNode downloadButton = moddbMainPage.GetElementbyId("downloadmirrorstoggle");
            string downloadPageUrl = Url.Combine(ModdbBaseUrl, downloadButton.GetAttributeValue("href", ""));

            // Download download page
            string tempDownloadPagePath = Path.Combine(TempDir, mapFolder + "DownloadPage.html");

            client.DownloadFile(downloadPageUrl, tempDownloadPagePath);

            // Scan for a tags in download page
            HtmlAgilityPack.HtmlDocument moddbDownloadPage = new HtmlAgilityPack.HtmlDocument();
            IEnumerable <HtmlAgilityPack.HtmlNode> anchors = moddbDownloadPage.DocumentNode.Descendants("a");

            if (anchors != null)
                // Regex matches text displayed on page for correct download link
                Regex downloadFile = new Regex(@"download (.*)\.(.*)");

                foreach (HtmlNode a in anchors)
                    Match match = downloadFile.Match(a.InnerText);

                    if (match.Success)
                        string filename      = match.Groups[1].ToString();
                        string fileExtension = match.Groups[2].ToString();
                        string destFile      = Path.Combine(TempDir, filename + "." + fileExtension);

                        string realDownloadUrl = Url.Combine(ModdbBaseUrl, a.GetAttributeValue("href", ""));

                        client.DownloadFileCompleted   += client_DownloadFileCompleted;
                        client.DownloadProgressChanged += client_DownloadProgressChanged;
                        //MessageBox.Show("File will start downloading.", "Downloading " + m.Name);
                        client.DownloadFileAsync(new Uri(realDownloadUrl), destFile);
                        client.downloadedFile = destFile;

                        // Show progress bar
                        progBarMapDownload.Visible = true;
                        // Show map name
                        lblMapInstalling.Visible = true;
                        lblMapInstalling.Text    = m.Name;
                        lblInstallStatus.Visible = true;
                        lblInstallStatus.Text    = "Downloading map...";

                        // Exit this now as we have found and processed our match
                        // Any other matches in the HTML are irrelevant
Beispiel #35
        void ExtractUsers(string documentText)
                HtmlAgilityPack.HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument();

                HtmlAgilityPack.HtmlNode bloodDonor = htmlDocument.DocumentNode.SelectSingleNode("//*[@id='regmain']");

                HtmlAgilityPack.HtmlNode linkPage = htmlDocument.DocumentNode.SelectSingleNode("//*[@id='regPage']/div/div/div/a[4]");

                //linkPageCount = linkPage.InnerLength;

                HtmlAgilityPack.HtmlNode userTable = htmlDocument.DocumentNode.SelectSingleNode("//*[@id='regPage']/div/table");

                HtmlAgilityPack.HtmlNodeCollection userTableCount = userTable.SelectNodes("./tr");

                for (int k = 2; k <= userTableCount.Count(); k++)
                    HtmlAgilityPack.HtmlNode moreBtn = userTable.SelectSingleNode("./tr[" + k + "]/td[6]/a");

                    string   linkBtn  = moreBtn.GetAttributeValue("onclick", null);
                    string[] mainLink = linkBtn.Split('b');
                    string   moreLink = mainLink[1];
                    using (WebClient client = new WebClient())
                        var link = client.DownloadString("" + moreLink);
                        HtmlAgilityPack.HtmlDocument userDocument = new HtmlAgilityPack.HtmlDocument();

                        HtmlAgilityPack.HtmlNode selectTable = userDocument.DocumentNode.SelectSingleNode("//*[@id='search']/form/table");

                        //User Name
                        HtmlAgilityPack.HtmlNode userName = selectTable.SelectSingleNode("./tr[1]/td[2]");
                        string Name = userName.InnerText;

                        //User Email
                        HtmlAgilityPack.HtmlNode userEmail = selectTable.SelectSingleNode("./tr[2]/td[2]");
                        string[] Mail    = userEmail.InnerHtml.Split('=');
                        string   addMail = Mail[1] + "=" + Mail[2];
                        string   Email   = addMail.Replace("border", "");
                        //fuction to convert png image into jpg

                        //User BloodGroup
                        HtmlAgilityPack.HtmlNode userBloodGroup = selectTable.SelectSingleNode("./tr[3]/td[2]");
                        string BloodGroup = userBloodGroup.InnerText;

                        //User Gender
                        HtmlAgilityPack.HtmlNode userGender = selectTable.SelectSingleNode("./tr[4]/td[2]");
                        string Gender = userGender.InnerText;

                        //User Age
                        HtmlAgilityPack.HtmlNode userAge = selectTable.SelectSingleNode("./tr[5]/td[2]");
                        int Age = Int32.Parse(userAge.InnerText.Replace("Years", ""));

                        //User City
                        HtmlAgilityPack.HtmlNode userCity = selectTable.SelectSingleNode("./tr[6]/td[2]");
                        string City = userCity.InnerText;

                        //User Mobile Number
                        HtmlAgilityPack.HtmlNode userMobile = selectTable.SelectSingleNode("./tr[7]/td[2]");
                        string[] num          = userMobile.InnerHtml.Split('=');
                        string   addNum       = num[1] + "=" + num[2];
                        string   mobileNumber = addNum.Replace("border", "");
                        //function to convert png image into jpg

                        //User Land Line Number
                        HtmlAgilityPack.HtmlNode userLandLine = selectTable.SelectSingleNode("./tr[8]/td[2]");
                        string[] landNum     = userLandLine.InnerHtml.Split('=');
                        string   addLandNum  = landNum[1] + "=" + landNum[2];
                        string   landLineNum = addLandNum.Replace("border", "");
                        //Function to convert png image into jpg

                        //User Last Donation Date
                        HtmlAgilityPack.HtmlNode userLastDonationDate = selectTable.SelectSingleNode("./tr[9]/td[2]");
                        string LastDonationDate = userLastDonationDate.InnerText;

                        //Store Data in DataBase
                        StoreUserData(Name, Email, BloodGroup, Gender, Age, City, mobileNumber, landLineNum, LastDonationDate);
Beispiel #36
            public ParsedLink(HtmlAgilityPack.HtmlNode node)
                Url.Add(node.GetAttributeValue("href", ""));
                InnerText = node.InnerText;

                DeviantArt = Url.Contains("deviantart");
                Uri uriResult;

                bool validLinkResult = Uri.TryCreate(Url[0], UriKind.Absolute, out uriResult) &&
                                       (uriResult.Scheme == Uri.UriSchemeHttp || uriResult.Scheme == Uri.UriSchemeHttps);

                if (validLinkResult)
                    JValid = uriResult.Host.Contains("deviantart");
                    JValid = false;

                Valid = (JValid && InnerText.Contains("ource")) || InnerText.Contains("ource") || JValid;

                Match match = Regex.Match(InnerText, @"(\d+)");

                if (!int.TryParse(match.Groups[1].Value, out Source))
                    Source = -1;

                if (Valid && !JValid && !DeviantArt)
                    var parent = node.ParentNode;

                    var sibling = parent.NextSibling;
                    while (sibling != null)
                        if (sibling.Name == "a")

                        // Check to see if we've moved onto another source's link
                        if (sibling.Name == "b")
                            var links = sibling.Descendants("a");

                            if (links.Any(l => l.InnerHtml.Contains("ource")))

                        if (sibling.Name == "div")
                            var links = sibling.Descendants("a");

                            var possibleImageLinks = links.FirstOrDefault(l => new [] { "jpg", "jpeg", "png", "gif" }.Any(e => l.GetAttributeValue("href", "").ToLower().Contains(e)));

                            if (possibleImageLinks != null)
                                if (!AlternativeProposed)
                                    OriginalUrl = Url[0];
                                Url.Add(possibleImageLinks.GetAttributeValue("href", ""));
                                AlternativeProposed = true;
                                JValid = true;

                        sibling = sibling.NextSibling;
        /// <summary>
        /// Fill an object and go through it's properties and fill them too.
        /// </summary>
        /// <typeparam name="T">Type of object to want to fill. It should have atleast one property that defined XPath.</typeparam>
        /// <param name="htmlDocument">If htmlDocument includes data , leave this parameter null. Else pass your specific htmldocument.</param>
        /// <returns>Returns an object of type T including Encapsulated data.</returns>
        public T GetEncapsulatedData <T>(HtmlDocument htmlDocument = null)
            HtmlDocument source = null;

            if (htmlDocument == null)
                source = OwnerDocument;
                source = htmlDocument;

            T targetObject = Activator.CreateInstance <T>();

            #region targetObject_Defined_XPath
            if (Tools.IsDefinedAttr(typeof(T), (typeof(HasXPathAttribute))) == true) // Object has xpath attribute (Defined HasXPath)
                // Store list of properties that defined xpath attribute
                IEnumerable <PropertyInfo> validProperties = Tools.GetPropertiesDefinedXPath(typeof(T));

                foreach (PropertyInfo propertyInfo in validProperties)
                    XPathAttribute xPathAttribute = (propertyInfo.GetCustomAttributes(typeof(XPathAttribute), false) as IList)[0] as XPathAttribute; // Get xpath attribute from valid properties

                    #region Property_IsNOT_IEnumerable
                    if (Tools.IsIEnumerable(propertyInfo) == false) // Property is None-IEnumerable
                        HtmlNode htmlNode = source.DocumentNode.SelectSingleNode(xPathAttribute.XPath);

                        #region Property_Is_HasXPath_UserDefinedClass
                        if (Tools.IsDefinedAttr(propertyInfo.PropertyType, (typeof(HasXPathAttribute))) == true) // Property is None-IEnumerable HasXPath-user-defined class
                            HtmlDocument innerHtmlDocument = new HtmlDocument();

                            MethodInfo getEncapsulatedData = Tools.GetMethodByItsName(typeof(HtmlNode), "GetEncapsulatedData").MakeGenericMethod(propertyInfo.PropertyType);
                            object     o = getEncapsulatedData.Invoke(innerHtmlDocument.DocumentNode, new object[] { innerHtmlDocument });

                            propertyInfo.SetValue(targetObject, o, null);
                        #endregion Property_Is_HasXPath_UserDefinedClass

                        #region Property_Is_SimpleType
                        // Property is None-IEnumerable value-type or .Net class or user-defined class (does not deifned xpath and shouldn't have property that defined xpath )
                            string result = string.Empty;

                            if (xPathAttribute.AttributeName == null) // It target None-IEnumerable value of HTMLTag
                                result = Tools.GetNodeValueBasedOnXPathReturnType <string>(htmlNode, xPathAttribute);
                            else // It target None-IEnumerable attribute of HTMLTag
                                result = htmlNode.GetAttributeValue(xPathAttribute.AttributeName, "Html Tag Attribute Not Specified");

                            propertyInfo.SetValue(targetObject, Convert.ChangeType(result, propertyInfo.PropertyType), null);
                        #endregion Property_Is_SimpleType
                    #endregion Property_IsNOT_IEnumerable

                    #region Property_Is_IEnumerable
                    else // Property is IEnumerable<T>
                        IList <Type> T_Types = Tools.GetGenericTypes(propertyInfo) as IList <Type>; // Get T type

                        if (T_Types == null || T_Types.Count == 0)
                            throw new NotImplementedException();

                        else if (T_Types.Count > 1)
                            throw new NotImplementedException();

                        else if (T_Types.Count == 1) // It is NOT something like Dictionary<Tkey , Tvalue>
                            HtmlNodeCollection nodeCollection = source.DocumentNode.SelectNodes(xPathAttribute.XPath);

                            IList result = Tools.CreateIListOfType(T_Types[0]);

                            #region Property_Is_IEnumerable<HasXPath-UserDefinedClass>
                            if (Tools.IsDefinedAttr(T_Types[0], typeof(HasXPathAttribute)) == true) // T is IEnumerable HasXPath-user-defined class (T type Defined XPath properties)
                                foreach (HtmlNode node in nodeCollection)
                                    HtmlDocument innerHtmlDocument = new HtmlDocument();

                                    MethodInfo getEncapsulatedData = Tools.GetMethodByItsName(typeof(HtmlNode), "GetEncapsulatedData").MakeGenericMethod(T_Types[0]);
                                    object     o = getEncapsulatedData.Invoke(innerHtmlDocument.DocumentNode, new object[] { innerHtmlDocument });

                            #endregion Property_Is_IEnumerable<HasXPath-UserDefinedClass>

                            #region Property_Is_IEnumerable<SimpleClass>
                            else // T is value-type or .Net class or user-defined class ( without xpath )
                                if (xPathAttribute.AttributeName == null) // It target value
                                    result = Tools.GetNodesValuesBasedOnXPathReturnType(nodeCollection, xPathAttribute, T_Types[0]);
                                else // It target attribute
                                    foreach (HtmlNode node in nodeCollection)
                                        result.Add(Convert.ChangeType(node.GetAttributeValue(xPathAttribute.AttributeName, "Html Tag Attribute Not Specified"), T_Types[0]));
                            #endregion Property_Is_IEnumerable<SimpleClass>

                            propertyInfo.SetValue(targetObject, result, null);
                    #endregion Property_IsNOT_IEnumerable

            #endregion targetObject_Defined_XPath

            #region targetObject_NOTDefined_XPath
            else // Object doesen't have xpath attribute
                throw new NotImplementedException();
            #endregion targetObject_NOTDefined_XPath