Esempio n. 1
0
        private static Uri FindPdfLink(HtmlNode scihub)
        {
            var options = scihub.DescendantsAndSelf("a")
                          .Select(x => x.TryGetLinkUrl())
                          .Where(x => x != null && HttpUtils.IsHttp(x) && x.AbsolutePath.EndsWith(".pdf"))
                          .GroupBy(x => x)
                          .ToList();

            if (options.Count <= 1)
            {
                return(options.FirstOrDefault()?.Key);
            }
            else
            {
                return(options.OrderByDescending(x => x.Key.AbsoluteUri.Length).First().Key);
            }
        }
        // Supported formats:
        // a=1&b=c    (isUnprefixedExtraParameters)
        // §a=1&b=c
        // .link-next
        // .link-next§§preserve
        // .link-next (alwaysPreserveRemainingParameters)
        // .link-next§§preserve§§a={z}

        public static bool UpdateNextLink(ref LazyUri modifiableUrl, HtmlNode node, string rule, bool isUnprefixedExtraParameters = false, bool alwaysPreserveRemainingParameters = false)
        {
            var  anyVarying = false;
            bool preserve   = alwaysPreserveRemainingParameters;

            if (!isUnprefixedExtraParameters)
            {
                string additionalChanges = null;
                if (!rule.StartsWith("§"))
                {
                    if (rule.Contains("§§preserve"))
                    {
                        preserve = true;
                        rule     = rule.Replace("§§preserve", string.Empty);
                    }
                    if (rule.Contains("§§"))
                    {
                        additionalChanges = rule.CaptureAfter("§§");
                        rule = rule.CaptureBefore("§§");
                    }
                    var nextlink = node.FindSingle(rule);
                    if (nextlink == null)
                    {
                        modifiableUrl = null; return(false);
                    }

                    var url = nextlink.TryGetLinkUrl();
                    if (url == null)
                    {
                        url = nextlink?.TryGetValue()?.AsUri();
                    }
                    if (!HttpUtils.IsHttp(url))
                    {
                        modifiableUrl = null; return(false);
                    }
                    if (!string.IsNullOrEmpty(url.Fragment))
                    {
                        url = url.GetLeftPart_UriPartial_Query().AsUri();
                    }

                    var defaults = preserve ? modifiableUrl.QueryParameters.Concat(modifiableUrl.FragmentParameters).ToList() : null;
                    modifiableUrl = new LazyUri(url);
                    if (defaults != null)
                    {
                        foreach (var kv in defaults)
                        {
                            if (kv.Key.StartsWith("$json-query-") && modifiableUrl.GetQueryParameter(kv.Key.CaptureBetween("-query-", "-")) != null)
                            {
                                continue;
                            }
                            if (modifiableUrl.GetQueryParameter(kv.Key) == null && modifiableUrl.GetFragmentParameter(kv.Key) == null)
                            {
                                if (kv.Key.StartsWith("$"))
                                {
                                    modifiableUrl.AppendFragmentParameter(kv.Key, kv.Value);
                                }
                                else
                                {
                                    modifiableUrl.AppendQueryParameter(kv.Key, kv.Value);
                                }
                            }
                        }
                    }
                    anyVarying = true;
                    if (additionalChanges == null)
                    {
                        return(anyVarying);
                    }
                }

                if (additionalChanges != null)
                {
                    rule = additionalChanges;
                }
                else
                {
                    rule = rule.Substring(1);
                }
            }



            var z = HttpUtils.GetParameters(rule);

            foreach (var kv in z)
            {
                var val = kv.Value;
                var key = kv.Key;
                if (key.StartsWith("£"))
                {
                    key = "$" + key.Substring(1);
                }

                if (val == "{delete}")
                {
                    if (key.StartsWith("$"))
                    {
                        modifiableUrl.RemoveFragmentParameter(key);
                    }
                    else
                    {
                        modifiableUrl.RemoveQueryParameter(key);
                    }
                    continue;
                }
                if (val.StartsWith("{") && val.EndsWith("}"))
                {
                    val = val.Substring(1, val.Length - 2);
                    var optional       = false;
                    var leaveUnchanged = false;
                    if (val.StartsWith("optional:"))
                    {
                        optional = true; val = val.CaptureAfter(":");
                    }
                    if (val.StartsWith("unchanged:"))
                    {
                        leaveUnchanged = true; val = val.CaptureAfter(":");
                    }
                    var v = node.TryGetValue(val);
                    anyVarying = true;
                    if (v == null)
                    {
                        if (leaveUnchanged)
                        {
                            continue;
                        }
                        if (optional)
                        {
                            if (key.StartsWith("$"))
                            {
                                modifiableUrl.RemoveFragmentParameter(key);
                            }
                            else
                            {
                                modifiableUrl.RemoveQueryParameter(key);
                            }
                            continue;
                        }
                        modifiableUrl = null;
                        return(anyVarying);
                    }
                    val = v;
                }


                if (key.StartsWith("$"))
                {
                    modifiableUrl.AppendFragmentParameter(key, val);
                }
                else
                {
                    modifiableUrl.AppendQueryParameter(key, val);
                }
            }

            return(anyVarying);
        }