public static List <String> ParseLinkElements(String Url) { var document = new HtmlDocument(); HttpGenericWebRequest genericWebRequest = new HttpGenericWebRequest(); List <String> urls = new List <string>(); var htmlResponse = genericWebRequest.doGet(Url); document.LoadHtml(htmlResponse); var LinkNodes = document.DocumentNode.SelectNodes("//a[@href]"); if (LinkNodes != null) { var LinkChildrenNodes = document.DocumentNode.SelectNodes("//*[@href]"); foreach (var link in LinkChildrenNodes) { String malformedUrl = String.Empty; // Find the Href Element and gets his value String hrefValue = link.GetAttributeValue("href", ""); // No href ..move to the next element if (hrefValue == null || !hrefValue.Contains('?') || hrefValue.Contains("http") || hrefValue.Contains("https")) { continue; } if (hrefValue.StartsWith("?")) { malformedUrl = Url.Split('?')[0]; malformedUrl += hrefValue; urls.Add(malformedUrl); continue; } string[] split = Url.Split('/'); if (split[split.Length - 1].Length < 1) { malformedUrl = Url + hrefValue; } else { malformedUrl = Url.Replace(split[split.Length - 1], hrefValue); } urls.Add(malformedUrl); } } return(urls); }
/// <summary> /// Parse the forms from the html document and returns an dictionary of them /// </summary> /// <param name="Url">The url target</param> /// <returns>The constructed dictionary</returns> public static List <NodeObjectForm> ParseFormNodes(String Url) { var document = new HtmlDocument(); Dictionary <String, String> dictionary = new Dictionary <String, String>(); HttpGenericWebRequest genericWebRequest = new HttpGenericWebRequest(); String newUrl = null; List <NodeObjectForm> nodeObjectFormList = new List <NodeObjectForm>(); var NodesCollection = document.DocumentNode.SelectNodes("//form"); string response = genericWebRequest.doGet(Url); document.LoadHtml(response); // Parse the response from server var formNode = document.DocumentNode.SelectNodes("//form"); if (formNode != null) { var ChildrenNodes = document.DocumentNode.SelectNodes("//form"); foreach (var form in ChildrenNodes) { var InputNodes = form.SelectNodes("//input"); if (InputNodes != null) { var InputChildren = form.SelectNodes("//input"); foreach (HtmlNode input in InputChildren) { string name = input.GetAttributeValue("name", ""); string value = input.GetAttributeValue("value", ""); dictionary[name] = value; } } string method = form.GetAttributeValue("method", ""); string action = form.GetAttributeValue("action", ""); if ((action == "") || (action == "#")) { newUrl = Url; } else if (action.StartsWith("http")) { newUrl = action; } else { string[] temp1 = Url.Split('?'); string[] temp2 = temp1[0].Split('/'); if (temp2[temp2.Length - 1].Length < 1) { newUrl = temp1[0] + action; } else { newUrl = temp1[0].Replace(temp2[temp2.Length - 1], action); } } var Clone = dictionary.ToDictionary(entry => entry.Key, entry => entry.Value); nodeObjectFormList.Add(new NodeObjectForm { Dictionary = Clone, Action = newUrl, Method = method }); dictionary.Clear(); } } return(nodeObjectFormList); }