Esempio n. 1
0
 public string SelectByCssSelector(string cssSelector)
 {
     return(FfeWebAngleSharp.SelectByCssSelector(cssSelector, GetHtml()));
 }
Esempio n. 2
0
        public static (string value, string rawSource) GetValueFromWeb(string url,
                                                                       string xPath        = null,
                                                                       string cssSelector  = null,
                                                                       string regExPattern = null, string regExGroup = null, int regExMatchIndex = 0,
                                                                       string jsonPath     = null,
                                                                       Parser parser       = Parser.Auto)
        {
            if (xPath == null &&
                cssSelector == null &&
                regExPattern == null &&
                jsonPath == null)
            {
                throw new ArgumentException("No selection criteria were provided. Provide a least one of the following selection criteria: xPath, cssSelector, regExPattern or jsonPath.");
            }

            string value     = null;
            string rawSource = null;

            Uri uri = new Uri(url);

            if (parser != Parser.Newtonsoft)
            {
                IFfeWebParser ffeWebParser = null;
                switch (parser)
                {
                case Parser.Auto:
                    ffeWebParser = AutoWebParserSelection(uri, xPath, cssSelector, regExPattern);
                    break;

                case Parser.HAP:
                    ffeWebParser = new FfeWebHap(uri);
                    break;

                case Parser.AngleSharp:
                    ffeWebParser = new FfeWebAngleSharp(uri);
                    break;

                case Parser.HttpClient:
                    ffeWebParser = new FfeWebHttpClient(uri);
                    break;

                case Parser.WebClient:
                    ffeWebParser = new FfeWebClient(uri);
                    break;

                default:
                    ffeWebParser = new FfeWebHap(uri);
                    break;
                }

                rawSource = ffeWebParser.GetHtml();

                if (log.IsEnabled(Serilog.Events.LogEventLevel.Debug))
                {
                    rawSource.WriteToFile($"PageSource_{uri.Host}.html", parser.ToString());
                }

                // Input for RegEx (if set).
                string input = null;
                if (!String.IsNullOrEmpty(xPath))
                {
                    value = ffeWebParser.SelectByXPath(xPath);
                    input = value;
                }
                else if (!String.IsNullOrEmpty(cssSelector))
                {
                    value = ffeWebParser.SelectByCssSelector(cssSelector);
                    input = value;
                }
                else // Select by RegEx (HTML source code = RegEx input).
                {
                    //HACK: AngelSharp does not provide full HTML source code.
                    if (parser == Parser.AngleSharp)
                    {
                        log.Warning("Regular Expression with AngleSharp WebParser does not work well. HttpClient is used instead.");
                        rawSource = FfeWeb.GetHttpResponseContent(uri);
                    }
                    input = rawSource;
                }

                if (!String.IsNullOrEmpty(regExPattern))
                {
                    value = FfeRegEx.RegExByIndexAndGroup(input, regExPattern, regExMatchIndex, regExGroup);

                    if (String.IsNullOrEmpty(value))
                    {
                        if (log.IsEnabled(Serilog.Events.LogEventLevel.Debug))
                        {
                            input.WriteToFile("RegExInput.html", "RegEx");
                        }
                        throw new RegExException()
                              {
                                  Input = input, Pattern = regExPattern
                              };
                    }
                }
            }
            else
            {
                IFfeJsonParser ffeJsonParser = new FfeJsonNewtonsoft(uri);
                value     = ffeJsonParser.SelectByJsonPath(jsonPath);
                rawSource = ffeJsonParser.GetJson();

                if (log.IsEnabled(Serilog.Events.LogEventLevel.Debug))
                {
                    rawSource.WriteToFile($"JsonSource_{uri.Host}.json", parser.ToString());
                }
            }

            return(value, rawSource);
        }