/// <summary> /// 开始解析转换爬取到的Url内容 /// </summary> /// <param name="param">参数</param> /// <returns>新的Urls</returns> public List <string> ParseUrl(params object[] param) { if (param.Length < 3) { return(null); } string content = param[0].ToString(); string baseForlder = param[1].ToString(); string url = param[2].ToString(); MyConsole.AppendLine(string.Format("开始解析Url:{0}的内容", url)); List <string> urls = new List <string>(); JObject objRoot = (JObject)JsonConvert.DeserializeObject(content); JArray imgs = (JArray)objRoot["imgs"]; for (int j = 0; j < imgs.Count; j++) { JObject img = (JObject)imgs[j]; string objUrl = (string)img["objURL"];//http://hibiadu....../1.jpg urls.Add(objUrl); } MyConsole.AppendLine(string.Format("找到{0}个图片..", urls.Count)); return(urls); }
/// <summary> /// 记录输出信息 /// </summary> /// <param name="content">日志信息</param> public static void AppendLog(string content) { try { if (Form == null || Form.IsDisposed) { Form = new FormOutput(); Form.BindConsole(); } if (Form.InvokeRequired) { Form.Invoke(new Action(() => { Form.Show(); Form.WindowState = FormWindowState.Normal; //Form.Activate(); })); } else { Form.Show(); Form.WindowState = FormWindowState.Normal; //Form.Activate(); } MyConsole.AppendLine(content); } catch { } }
/// <summary> /// 开始解析转换爬取到的Url内容 /// </summary> /// <param name="param">参数</param> /// <returns>新的Urls</returns> public List <string> ParseUrl(params object[] param) { if (param.Length < 3) { return(null); } string content = param[0].ToString(); string baseForlder = param[1].ToString(); string url = param[2].ToString(); MyConsole.AppendLine(string.Format("开始解析Url:{0}的内容", url)); List <string> urls = new List <string>(); Regex regex = new Regex("href\\s*=\\s*(?:\"(?<1>[^\"]*)\"|(?<1>\\S+))", RegexOptions.IgnoreCase | RegexOptions.Compiled); if (regex.IsMatch(content)) { MatchCollection collection = regex.Matches(content); foreach (Match item in collection) { urls.Add(item.Groups[1].Value); } } MyConsole.AppendLine(string.Format("找到{0}个锚点..", urls.Count)); regex = new Regex(@"(?i)<img[^>]*?\ssrc\s*=\s*(['""]?)(?<src>[^'""\s>]+)\1[^>]*>"); MatchCollection mc = regex.Matches(content); foreach (Match m in mc) { urls.Add(m.Groups["src"].Value); } MyConsole.AppendLine(string.Format("找到{0}个图片..", mc.Count)); //返回新的Url List <ParseModel> parseModels = RegexCondition as List <ParseModel>; //储存需要的文本 if (parseModels != null && parseModels.Count > 0) { foreach (var item in parseModels) { Regex temp = new Regex(item.RegexString); if (temp.IsMatch(content)) { MatchCollection matches = temp.Matches(content); foreach (Match match in matches) { ContentManger.Save(baseForlder, Encoding.Default.GetBytes(match.Value), item.SaveType, Guid.NewGuid().ToString() + ".txt"); _main.DownloadFileCount++; } } } } return(urls); }