public HttpScraper(string sourceUri, string xpath) { web = new FormProcessorWeb(true); web.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)"; processor = new FormProcessor(web); form = processor.GetForm(sourceUri, xpath, FormQueryModeEnum.Nested); }
public void SetForm(Form _form) { form = _form; }
/// <summary> /// Posts the form back to the server with a payload /// assembled from the specified Form object's HtmlNodeCollection. /// </summary> /// <param name="form">The form.</param> /// <returns></returns> public HtmlDocument SubmitForm(Form form) { return SubmitForm(form.elements, form.action); }
public void SetForm(HtmlDocument doc, string url, string xpath) { form = processor.GetForm(doc, url, xpath, FormQueryModeEnum.Nested); }
/// <summary> /// Attempts to extract, parse, and return an HTMLFormElement object /// from the content contained in the doc object, using the specified /// XPath statement and queryMode. /// </summary> /// <param name="doc">The doc.</param> /// <param name="url">The URL.</param> /// <param name="xpath">The xpath.</param> /// <param name="queryMode">The query mode.</param> /// <returns></returns> public Form GetForm(HtmlDocument doc, string url, string xpath, // # FIX THIS FUNCTION FormQueryModeEnum queryMode) { HtmlNode formNode = doc.DocumentNode.SelectSingleNode(xpath); if (formNode == null) return null; HtmlNodeCollection formNodes = null; Form form = null; // Absolutize the form's action attribute AbsolutizeForm(formNode, url); if (formNode != null) // TODO : fix this { // If queryMode is nested then just apply the path from // the point of the containing element if (queryMode == FormQueryModeEnum.Nested) { formNodes = formNode.SelectNodes(NodeSelectPath); } else if (queryMode == FormQueryModeEnum.Adjacent) { // Otherwise, the form is not properly the parent of // all its nodes, so grab all child nodes of the entire document // *TODO* make this take into account the node position formNodes = doc.DocumentNode.SelectNodes(NodeSelectPath); // Add these nodes to the form element } if (formNodes != null) { form = new Form(formNode, formNodes); } } return form; // could be null! }