private static HtmlFormDefinition PopulateForm(Match formMatch, string pageHtml) { var parsedForm = new HtmlFormDefinition { PageHtml = pageHtml }; foreach (var attribute in Utility.ParseAttributes(formMatch.Groups[RegexLibrary.ParseFormsAttributesGroup].Value)) { parsedForm.attributes.Add(attribute.Key, attribute.Value); } // TODO: need to remove comments from form HTML before parsing out controls; commented out controls will be found! // Populate controls var controlMatches = RegexCache.Instance.Regex(RegexLibrary.ParseFormControls, RegexLibrary.ParseFormControlsOptions).Matches(formMatch.Groups[RegexLibrary.ParseFormsBodyGroup].Value); foreach (Match controlMatch in controlMatches) { HtmlFormControl control; if (controlMatch.Groups[RegexLibrary.ParseFormControlsInputGroup].Value.Length > 0) { var inputControl = new InputHtmlFormControl(controlMatch.Value); if (inputControl.ControlType == InputHtmlFormControlType.Radio) { control = new InputRadioHtmlFormControl(controlMatch.Value); } else if (inputControl.ControlType == InputHtmlFormControlType.CheckBox) { control = new InputCheckBoxHtmlFormControl(controlMatch.Value); } else { // Generic control control = inputControl; } } else if (controlMatch.Groups[RegexLibrary.ParseFormControlsSelectGroup].Value.Length > 0) { control = new SelectHtmlFormControl(controlMatch.Value); } else if (controlMatch.Groups[RegexLibrary.ParseFormControlsTextAreaGroup].Value.Length > 0) { control = new TextAreaHtmlFormControl(controlMatch.Value); } else { throw new System.Net.WebException(string.Format(CultureInfo.CurrentCulture, NScrapeResources.UnsupportedHtmlControl, controlMatch.Value)); } if (control.Name != null) { parsedForm.controls.Add(control); } } return(parsedForm); }
/// <summary> /// Builds the request data to be used to submit an ASPX form. /// </summary> /// <param name="eventTargetValue">Contains the value for <b>__EVENTTARGET</b>, the control doing the submission.</param> /// <param name="eventArgumentValue">Contains the value for <b>__EVENTARGUMENT</b>, any additional information.</param> /// <returns>The request data in <b>application/x-www-form-urlencoded</b> format.</returns> /// <remarks> /// See <see href="http://www.evagoras.com/2011/02/10/how-postback-works-in-asp-net/">How postback works in ASP.NET</see> for a good overview on the topic. /// </remarks> protected string BuildAspxPostBackRequest(string eventTargetValue, string eventArgumentValue) { var eventTarget = Controls.SingleOrDefault(c => c.Name == EventTargetName) as InputHtmlFormControl; if (eventTarget == null) { // If __EVENTTARGET is not in the HTML (injected via JavaScript?), add it manually. eventTarget = new InputHtmlFormControl("<input type=\"hidden\" name=\"__EVENTTARGET\" id=\"__EVENTTARGET\" value=\"\" />"); Controls.Add(eventTarget); } var eventArgument = Controls.SingleOrDefault(c => c.Name == EventArgumentName) as InputHtmlFormControl; if (eventArgument == null) { // If __EVENTARGUMENT is not in the HTML (injected via JavaScript?), add it manually. eventArgument = new InputHtmlFormControl("<input type=\"hidden\" name=\"__EVENTARGUMENT\" id=\"__EVENTARGUMENT\" value=\"\" />"); Controls.Add(eventArgument); } // Most __doPostBack() examples found via Googling unescaped the event target parameter, // but some did not, so play it safe and see which kind we have if (RegexCache.Instance.Regex(RegexLibrary.MatchDoPostBack, RegexLibrary.MatchDoPostBackOptions).IsMatch(Html)) { eventTarget.Value = eventTargetValue.Replace('$', ':'); } else { eventTarget.Value = eventTargetValue; } eventArgument.Value = eventArgumentValue; // Build the request using an empty string so that none of the buttons in the form are included. var request = BuildRequest(string.Empty); // Do not persist the values eventTarget.Value = string.Empty; eventArgument.Value = string.Empty; return(request); }
/// <summary> /// Builds the request data to be used to submit an ASPX form. /// </summary> /// <param name="eventTargetValue">Contains the value for <b>__EVENTTARGET</b>, the control doing the submission.</param> /// <param name="eventArgumentValue">Contains the value for <b>__EVENTARGUMENT</b>, any additional information.</param> /// <returns>The request data in <b>application/x-www-form-urlencoded</b> format.</returns> /// <remarks> /// See <see href="http://www.evagoras.com/2011/02/10/how-postback-works-in-asp-net/">How postback works in ASP.NET</see> for a good overview on the topic. /// </remarks> protected string BuildAspxPostBackRequest( string eventTargetValue, string eventArgumentValue ) { var eventTarget = Controls.SingleOrDefault( c => c.Name == EventTargetName ) as InputHtmlFormControl; if ( eventTarget == null ) { // If __EVENTTARGET is not in the HTML (injected via JavaScript?), add it manually. eventTarget = new InputHtmlFormControl( "<input type=\"hidden\" name=\"__EVENTTARGET\" id=\"__EVENTTARGET\" value=\"\" />" ); Controls.Add( eventTarget ); } var eventArgument = Controls.SingleOrDefault( c => c.Name == EventArgumentName ) as InputHtmlFormControl; if ( eventArgument == null ) { // If __EVENTARGUMENT is not in the HTML (injected via JavaScript?), add it manually. eventArgument = new InputHtmlFormControl( "<input type=\"hidden\" name=\"__EVENTARGUMENT\" id=\"__EVENTARGUMENT\" value=\"\" />" ); Controls.Add( eventArgument ); } // Most __doPostBack() examples found via Googling unescaped the event target parameter, // but some did not, so play it safe and see which kind we have if ( RegexCache.Instance.Regex( RegexLibrary.MatchDoPostBack, RegexLibrary.MatchDoPostBackOptions ).IsMatch( Html ) ) { eventTarget.Value = eventTargetValue.Replace( '$', ':' ); } else { eventTarget.Value = eventTargetValue; } eventArgument.Value = eventArgumentValue; // Build the request using an empty string so that none of the buttons in the form are included. var request = BuildRequest( string.Empty ); // Do not persist the values eventTarget.Value = string.Empty; eventArgument.Value = string.Empty; return request; }
private static HtmlFormDefinition PopulateForm( Match formMatch, string pageHtml ) { var parsedForm = new HtmlFormDefinition { PageHtml = pageHtml }; foreach ( var attribute in Utility.ParseAttributes( formMatch.Groups[RegexLibrary.ParseFormsAttributesGroup].Value ) ) { parsedForm.attributes.Add( attribute.Key, attribute.Value ); } // TODO: need to remove comments from form HTML before parsing out controls; commented out controls will be found! // Populate controls var controlMatches = RegexCache.Instance.Regex( RegexLibrary.ParseFormControls, RegexLibrary.ParseFormControlsOptions ).Matches( formMatch.Groups[RegexLibrary.ParseFormsBodyGroup].Value ); foreach ( Match controlMatch in controlMatches ) { HtmlFormControl control; if ( controlMatch.Groups[RegexLibrary.ParseFormControlsInputGroup].Value.Length > 0 ) { var inputControl = new InputHtmlFormControl( controlMatch.Value ); if ( inputControl.ControlType == InputHtmlFormControlType.Radio ) { control = new InputRadioHtmlFormControl( controlMatch.Value ); } else if ( inputControl.ControlType == InputHtmlFormControlType.CheckBox ) { control = new InputCheckBoxHtmlFormControl( controlMatch.Value ); } else { // Generic control control = inputControl; } } else if ( controlMatch.Groups[RegexLibrary.ParseFormControlsSelectGroup].Value.Length > 0 ) { control = new SelectHtmlFormControl( controlMatch.Value ); } else if ( controlMatch.Groups[RegexLibrary.ParseFormControlsTextAreaGroup].Value.Length > 0 ) { control = new TextAreaHtmlFormControl( controlMatch.Value ); } else { throw new System.Net.WebException( string.Format( CultureInfo.CurrentCulture, NScrapeResources.UnsupportedHtmlControl, controlMatch.Value ) ); } if ( control.Name != null ) { parsedForm.controls.Add( control ); } } return parsedForm; }