public static List <string> GetVideoUrl(string html, string sourceUrl) { List <string> videoUrls = new List <string>(); try { Regex r = new Regex("https?[^\"'<>]*?[^\"' <>]+?\\.(?: mp4 | flv)[^\"'<>]*"); while (r.IsMatch(html)) { string videoUrl = r.Match(html).Value; if (string.IsNullOrEmpty(videoUrl)) { continue; } videoUrl = RegexValidateUtil.getAbsoluteUrlFromRelative(videoUrl, sourceUrl); videoUrls.Add(videoUrl); } } catch (Exception) { } return(videoUrls); }
public static List <string> GetPropertyAfterRegex(List <string> props, string prop, Selector selector, string sourceUrl, bool isUrl) { if (selector.regex != null) { var r = new Regex(selector.regex); if (r.IsMatch(prop)) { var m = r.Match(prop); do { if (selector.replacement != null) { prop = selector.replacement; for (int i = 1; i <= m.Groups.Count; i++) { string replace = m.Groups[i].Value; prop = prop.Replace("$" + i, (replace != null) ? replace : ""); } } else { prop = m.Groups[1].Value; } if (isUrl) { if (string.IsNullOrEmpty(prop)) { break; } prop = RegexValidateUtil.getAbsoluteUrlFromRelative(prop, sourceUrl); } props.Add(System.Net.WebUtility.HtmlEncode(prop.Trim())); m = m.NextMatch(); } while ((m.Success && m.Groups.Count >= 1)); } } else { if (isUrl && !string.IsNullOrEmpty(prop)) { prop = RegexValidateUtil.getAbsoluteUrlFromRelative(prop, sourceUrl); } props.Add(System.Net.WebUtility.HtmlEncode(prop.Trim())); } return(props); }
public static List <string> ParseSinglePropertyMatchAll(object source, Selector selector, string sourceUrl, bool isUrl) { List <string> props = new List <string>(); if (selector != null) { string prop; if (source is Element) { var temp = ("this".Equals(selector.selector)) ? new Elements((Element)source) : ((Element)source).Select(selector.selector); if (temp != null) { bool doJsonParse = !string.IsNullOrEmpty(selector.path); foreach (var elem in temp) { if ("attr".Equals(selector.fun)) { prop = elem.Attr(selector.param); } else if ("html".Equals(selector.fun)) { prop = elem.Html(); } else if ("text".Equals(selector.fun)) { prop = elem.Text(); } else { prop = elem.ToString(); } if (doJsonParse) { props = GetPropertyAfterRegex(props, prop, selector, sourceUrl, false); } else { props = GetPropertyAfterRegex(props, prop, selector, sourceUrl, isUrl); } } if (doJsonParse) { try { for (int i = 0; i < props.Count; i++) { prop = props[i]; object tempItem = JToken.Parse(prop).SelectToken(selector.path); if (tempItem is JValue) { prop = ((JValue)tempItem).ToString(); } else { prop = tempItem.ToString(); } if (!string.IsNullOrEmpty(prop)) { if (isUrl) { prop = RegexValidateUtil.getAbsoluteUrlFromRelative(prop, sourceUrl); } props[i] = prop; } } } catch (Exception) { } } } } else if (source is JToken) { List <JToken> temp = new List <JToken>(); try { var elem = ((JToken)source).SelectTokens(selector.path); temp = elem.ToList(); } catch (Exception) { } if (temp != null) { foreach (JToken item in temp) { prop = item.ToString(); if (!string.IsNullOrEmpty(selector.selector)) { try { string newProp; var element = ("this".Equals(selector.selector)) ? new Elements(NSoupClient.Parse(prop)) : NSoupClient.Parse(prop).Select(selector.selector); if (element != null) { foreach (var elem in element) { if ("attr".Equals(selector.fun)) { newProp = elem.Attr(selector.param); } else if ("html".Equals(selector.fun)) { newProp = elem.Html(); } else if ("text".Equals(selector.fun)) { newProp = elem.Text(); } else { newProp = elem.ToString(); } if (!string.IsNullOrEmpty(newProp)) { prop = newProp; } } } } catch (Exception) { } } if (!string.IsNullOrEmpty(prop) && !"null".Equals(prop.Trim())) { props = GetPropertyAfterRegex(props, prop, selector, sourceUrl, isUrl); } } } } } if (props.Count == 0) { props.Add(""); } return(props); }