コード例 #1
0
        public static List <string> GetVideoUrl(string html, string sourceUrl)
        {
            List <string> videoUrls = new List <string>();

            try
            {
                Regex r = new Regex("https?[^\"'<>]*?[^\"' <>]+?\\.(?: mp4 | flv)[^\"'<>]*");


                while (r.IsMatch(html))
                {
                    string videoUrl = r.Match(html).Value;
                    if (string.IsNullOrEmpty(videoUrl))
                    {
                        continue;
                    }
                    videoUrl = RegexValidateUtil.getAbsoluteUrlFromRelative(videoUrl, sourceUrl);
                    videoUrls.Add(videoUrl);
                }
            }
            catch (Exception)
            {
            }
            return(videoUrls);
        }
コード例 #2
0
        public static List <string> GetPropertyAfterRegex(List <string> props, string prop, Selector selector, string sourceUrl, bool isUrl)
        {
            if (selector.regex != null)
            {
                var r = new Regex(selector.regex);


                if (r.IsMatch(prop))
                {
                    var m = r.Match(prop);

                    do
                    {
                        if (selector.replacement != null)
                        {
                            prop = selector.replacement;
                            for (int i = 1; i <= m.Groups.Count; i++)
                            {
                                string replace = m.Groups[i].Value;
                                prop = prop.Replace("$" + i, (replace != null) ? replace : "");
                            }
                        }
                        else
                        {
                            prop = m.Groups[1].Value;
                        }
                        if (isUrl)
                        {
                            if (string.IsNullOrEmpty(prop))
                            {
                                break;
                            }
                            prop = RegexValidateUtil.getAbsoluteUrlFromRelative(prop, sourceUrl);
                        }
                        props.Add(System.Net.WebUtility.HtmlEncode(prop.Trim()));

                        m = m.NextMatch();
                    } while ((m.Success && m.Groups.Count >= 1));
                }
            }
            else
            {
                if (isUrl && !string.IsNullOrEmpty(prop))
                {
                    prop = RegexValidateUtil.getAbsoluteUrlFromRelative(prop, sourceUrl);
                }
                props.Add(System.Net.WebUtility.HtmlEncode(prop.Trim()));
            }

            return(props);
        }
コード例 #3
0
        public static List <string> ParseSinglePropertyMatchAll(object source, Selector selector, string sourceUrl, bool isUrl)
        {
            List <string> props = new List <string>();

            if (selector != null)
            {
                string prop;
                if (source is Element)
                {
                    var temp = ("this".Equals(selector.selector)) ? new Elements((Element)source) : ((Element)source).Select(selector.selector);
                    if (temp != null)
                    {
                        bool doJsonParse = !string.IsNullOrEmpty(selector.path);
                        foreach (var elem in temp)
                        {
                            if ("attr".Equals(selector.fun))
                            {
                                prop = elem.Attr(selector.param);
                            }
                            else if ("html".Equals(selector.fun))
                            {
                                prop = elem.Html();
                            }
                            else if ("text".Equals(selector.fun))
                            {
                                prop = elem.Text();
                            }
                            else
                            {
                                prop = elem.ToString();
                            }


                            if (doJsonParse)
                            {
                                props = GetPropertyAfterRegex(props, prop, selector, sourceUrl, false);
                            }
                            else
                            {
                                props = GetPropertyAfterRegex(props, prop, selector, sourceUrl, isUrl);
                            }
                        }

                        if (doJsonParse)
                        {
                            try
                            {
                                for (int i = 0; i < props.Count; i++)
                                {
                                    prop = props[i];
                                    object tempItem = JToken.Parse(prop).SelectToken(selector.path);
                                    if (tempItem is JValue)
                                    {
                                        prop = ((JValue)tempItem).ToString();
                                    }
                                    else
                                    {
                                        prop = tempItem.ToString();
                                    }
                                    if (!string.IsNullOrEmpty(prop))
                                    {
                                        if (isUrl)
                                        {
                                            prop = RegexValidateUtil.getAbsoluteUrlFromRelative(prop, sourceUrl);
                                        }

                                        props[i] = prop;
                                    }
                                }
                            }
                            catch (Exception)
                            {
                            }
                        }
                    }
                }
                else if (source is JToken)
                {
                    List <JToken> temp = new List <JToken>();

                    try
                    {
                        var elem = ((JToken)source).SelectTokens(selector.path);
                        temp = elem.ToList();
                    }
                    catch (Exception)
                    {
                    }

                    if (temp != null)
                    {
                        foreach (JToken item in temp)
                        {
                            prop = item.ToString();

                            if (!string.IsNullOrEmpty(selector.selector))
                            {
                                try
                                {
                                    string newProp;
                                    var    element = ("this".Equals(selector.selector)) ? new Elements(NSoupClient.Parse(prop)) : NSoupClient.Parse(prop).Select(selector.selector);
                                    if (element != null)
                                    {
                                        foreach (var elem in element)
                                        {
                                            if ("attr".Equals(selector.fun))
                                            {
                                                newProp = elem.Attr(selector.param);
                                            }
                                            else if ("html".Equals(selector.fun))
                                            {
                                                newProp = elem.Html();
                                            }
                                            else if ("text".Equals(selector.fun))
                                            {
                                                newProp = elem.Text();
                                            }
                                            else
                                            {
                                                newProp = elem.ToString();
                                            }
                                            if (!string.IsNullOrEmpty(newProp))
                                            {
                                                prop = newProp;
                                            }
                                        }
                                    }
                                }
                                catch (Exception)
                                {
                                }
                            }
                            if (!string.IsNullOrEmpty(prop) && !"null".Equals(prop.Trim()))
                            {
                                props = GetPropertyAfterRegex(props, prop, selector, sourceUrl, isUrl);
                            }
                        }
                    }
                }
            }
            if (props.Count == 0)
            {
                props.Add("");
            }
            return(props);
        }