Beispiel #1
0
        private static HtmlFormDefinition PopulateForm(Match formMatch, string pageHtml)
        {
            var parsedForm = new HtmlFormDefinition {
                PageHtml = pageHtml
            };

            foreach (var attribute in Utility.ParseAttributes(formMatch.Groups[RegexLibrary.ParseFormsAttributesGroup].Value))
            {
                parsedForm.attributes.Add(attribute.Key, attribute.Value);
            }

            // TODO: need to remove comments from form HTML before parsing out controls; commented out controls will be found!

            // Populate controls
            var controlMatches = RegexCache.Instance.Regex(RegexLibrary.ParseFormControls, RegexLibrary.ParseFormControlsOptions).Matches(formMatch.Groups[RegexLibrary.ParseFormsBodyGroup].Value);

            foreach (Match controlMatch in controlMatches)
            {
                HtmlFormControl control;

                if (controlMatch.Groups[RegexLibrary.ParseFormControlsInputGroup].Value.Length > 0)
                {
                    var inputControl = new InputHtmlFormControl(controlMatch.Value);

                    if (inputControl.ControlType == InputHtmlFormControlType.Radio)
                    {
                        control = new InputRadioHtmlFormControl(controlMatch.Value);
                    }
                    else if (inputControl.ControlType == InputHtmlFormControlType.CheckBox)
                    {
                        control = new InputCheckBoxHtmlFormControl(controlMatch.Value);
                    }
                    else
                    {
                        // Generic control
                        control = inputControl;
                    }
                }
                else if (controlMatch.Groups[RegexLibrary.ParseFormControlsSelectGroup].Value.Length > 0)
                {
                    control = new SelectHtmlFormControl(controlMatch.Value);
                }
                else if (controlMatch.Groups[RegexLibrary.ParseFormControlsTextAreaGroup].Value.Length > 0)
                {
                    control = new TextAreaHtmlFormControl(controlMatch.Value);
                }
                else
                {
                    throw new System.Net.WebException(string.Format(CultureInfo.CurrentCulture, NScrapeResources.UnsupportedHtmlControl, controlMatch.Value));
                }

                if (control.Name != null)
                {
                    parsedForm.controls.Add(control);
                }
            }

            return(parsedForm);
        }
Beispiel #2
0
        /// <summary>
        /// Builds the request data to be used to submit an ASPX form.
        /// </summary>
        /// <param name="eventTargetValue">Contains the value for <b>__EVENTTARGET</b>, the control doing the submission.</param>
        /// <param name="eventArgumentValue">Contains the value for <b>__EVENTARGUMENT</b>, any additional information.</param>
        /// <returns>The request data in <b>application/x-www-form-urlencoded</b> format.</returns>
        /// <remarks>
        /// See <see href="http://www.evagoras.com/2011/02/10/how-postback-works-in-asp-net/">How postback works in ASP.NET</see> for a good overview on the topic.
        /// </remarks>
        protected string BuildAspxPostBackRequest(string eventTargetValue, string eventArgumentValue)
        {
            var eventTarget = Controls.SingleOrDefault(c => c.Name == EventTargetName) as InputHtmlFormControl;

            if (eventTarget == null)
            {
                // If __EVENTTARGET is not in the HTML (injected via JavaScript?), add it manually.
                eventTarget = new InputHtmlFormControl("<input type=\"hidden\" name=\"__EVENTTARGET\" id=\"__EVENTTARGET\" value=\"\" />");
                Controls.Add(eventTarget);
            }

            var eventArgument = Controls.SingleOrDefault(c => c.Name == EventArgumentName) as InputHtmlFormControl;

            if (eventArgument == null)
            {
                // If __EVENTARGUMENT is not in the HTML (injected via JavaScript?), add it manually.
                eventArgument = new InputHtmlFormControl("<input type=\"hidden\" name=\"__EVENTARGUMENT\" id=\"__EVENTARGUMENT\" value=\"\" />");
                Controls.Add(eventArgument);
            }

            // Most __doPostBack() examples found via Googling unescaped the event target parameter,
            // but some did not, so play it safe and see which kind we have
            if (RegexCache.Instance.Regex(RegexLibrary.MatchDoPostBack, RegexLibrary.MatchDoPostBackOptions).IsMatch(Html))
            {
                eventTarget.Value = eventTargetValue.Replace('$', ':');
            }
            else
            {
                eventTarget.Value = eventTargetValue;
            }

            eventArgument.Value = eventArgumentValue;

            // Build the request using an empty string so that none of the buttons in the form are included.
            var request = BuildRequest(string.Empty);

            // Do not persist the values
            eventTarget.Value   = string.Empty;
            eventArgument.Value = string.Empty;

            return(request);
        }
Beispiel #3
0
        /// <summary>
        /// Builds the request data to be used to submit an ASPX form.
        /// </summary>
        /// <param name="eventTargetValue">Contains the value for <b>__EVENTTARGET</b>, the control doing the submission.</param>
        /// <param name="eventArgumentValue">Contains the value for <b>__EVENTARGUMENT</b>, any additional information.</param>
        /// <returns>The request data in <b>application/x-www-form-urlencoded</b> format.</returns>
        /// <remarks>
        /// See <see href="http://www.evagoras.com/2011/02/10/how-postback-works-in-asp-net/">How postback works in ASP.NET</see> for a good overview on the topic.
        /// </remarks>
        protected string BuildAspxPostBackRequest( string eventTargetValue, string eventArgumentValue )
        {
            var eventTarget = Controls.SingleOrDefault( c => c.Name == EventTargetName ) as InputHtmlFormControl;
            if ( eventTarget == null ) {
                // If __EVENTTARGET is not in the HTML (injected via JavaScript?), add it manually.
                eventTarget = new InputHtmlFormControl( "<input type=\"hidden\" name=\"__EVENTTARGET\" id=\"__EVENTTARGET\" value=\"\" />" );
                Controls.Add( eventTarget );
            }

            var eventArgument = Controls.SingleOrDefault( c => c.Name == EventArgumentName ) as InputHtmlFormControl;
            if ( eventArgument == null ) {
                // If __EVENTARGUMENT is not in the HTML (injected via JavaScript?), add it manually.
                eventArgument = new InputHtmlFormControl( "<input type=\"hidden\" name=\"__EVENTARGUMENT\" id=\"__EVENTARGUMENT\" value=\"\" />" );
                Controls.Add( eventArgument );
            }

            // Most __doPostBack() examples found via Googling unescaped the event target parameter,
            // but some did not, so play it safe and see which kind we have
            if ( RegexCache.Instance.Regex( RegexLibrary.MatchDoPostBack, RegexLibrary.MatchDoPostBackOptions ).IsMatch( Html ) ) {
                eventTarget.Value = eventTargetValue.Replace( '$', ':' );
            }
            else {
                eventTarget.Value = eventTargetValue;
            }

            eventArgument.Value = eventArgumentValue;

            // Build the request using an empty string so that none of the buttons in the form are included.
            var request = BuildRequest( string.Empty );

            // Do not persist the values
            eventTarget.Value = string.Empty;
            eventArgument.Value = string.Empty;

            return request;
        }
        private static HtmlFormDefinition PopulateForm( Match formMatch, string pageHtml )
        {
            var parsedForm = new HtmlFormDefinition {
                PageHtml = pageHtml
            };

            foreach ( var attribute in Utility.ParseAttributes( formMatch.Groups[RegexLibrary.ParseFormsAttributesGroup].Value ) ) {
                parsedForm.attributes.Add( attribute.Key, attribute.Value );
            }

            // TODO: need to remove comments from form HTML before parsing out controls; commented out controls will be found!

            // Populate controls
            var controlMatches = RegexCache.Instance.Regex( RegexLibrary.ParseFormControls, RegexLibrary.ParseFormControlsOptions ).Matches( formMatch.Groups[RegexLibrary.ParseFormsBodyGroup].Value );

            foreach ( Match controlMatch in controlMatches ) {
                HtmlFormControl control;

                if ( controlMatch.Groups[RegexLibrary.ParseFormControlsInputGroup].Value.Length > 0 ) {
                    var inputControl = new InputHtmlFormControl( controlMatch.Value );

                    if ( inputControl.ControlType == InputHtmlFormControlType.Radio ) {
                        control = new InputRadioHtmlFormControl( controlMatch.Value );
                    }
                    else if ( inputControl.ControlType == InputHtmlFormControlType.CheckBox ) {
                        control = new InputCheckBoxHtmlFormControl( controlMatch.Value );
                    }
                    else {
                        // Generic control
                        control = inputControl;
                    }
                }
                else if ( controlMatch.Groups[RegexLibrary.ParseFormControlsSelectGroup].Value.Length > 0 ) {
                    control = new SelectHtmlFormControl( controlMatch.Value );
                }
                else if ( controlMatch.Groups[RegexLibrary.ParseFormControlsTextAreaGroup].Value.Length > 0 ) {
                    control = new TextAreaHtmlFormControl( controlMatch.Value );
                }
                else {
                    throw new System.Net.WebException( string.Format( CultureInfo.CurrentCulture, NScrapeResources.UnsupportedHtmlControl, controlMatch.Value ) );
                }

                if ( control.Name != null ) {
                    parsedForm.controls.Add( control );
                }
            }

            return parsedForm;
        }