Beispiel #1
0
        /// <summary>
        /// 开始解析转换爬取到的Url内容
        /// </summary>
        /// <param name="param">参数</param>
        /// <returns>新的Urls</returns>
        public List <string> ParseUrl(params object[] param)
        {
            if (param.Length < 3)
            {
                return(null);
            }
            string content     = param[0].ToString();
            string baseForlder = param[1].ToString();
            string url         = param[2].ToString();

            MyConsole.AppendLine(string.Format("开始解析Url:{0}的内容", url));
            List <string> urls = new List <string>();

            JObject objRoot = (JObject)JsonConvert.DeserializeObject(content);
            JArray  imgs    = (JArray)objRoot["imgs"];

            for (int j = 0; j < imgs.Count; j++)
            {
                JObject img    = (JObject)imgs[j];
                string  objUrl = (string)img["objURL"];//http://hibiadu....../1.jpg
                urls.Add(objUrl);
            }
            MyConsole.AppendLine(string.Format("找到{0}个图片..", urls.Count));
            return(urls);
        }
Beispiel #2
0
 /// <summary>
 /// 记录输出信息
 /// </summary>
 /// <param name="content">日志信息</param>
 public static void AppendLog(string content)
 {
     try
     {
         if (Form == null || Form.IsDisposed)
         {
             Form = new FormOutput();
             Form.BindConsole();
         }
         if (Form.InvokeRequired)
         {
             Form.Invoke(new Action(() =>
             {
                 Form.Show();
                 Form.WindowState = FormWindowState.Normal;
                 //Form.Activate();
             }));
         }
         else
         {
             Form.Show();
             Form.WindowState = FormWindowState.Normal;
             //Form.Activate();
         }
         MyConsole.AppendLine(content);
     }
     catch { }
 }
        /// <summary>
        /// 开始解析转换爬取到的Url内容
        /// </summary>
        /// <param name="param">参数</param>
        /// <returns>新的Urls</returns>
        public List <string> ParseUrl(params object[] param)
        {
            if (param.Length < 3)
            {
                return(null);
            }
            string content     = param[0].ToString();
            string baseForlder = param[1].ToString();
            string url         = param[2].ToString();

            MyConsole.AppendLine(string.Format("开始解析Url:{0}的内容", url));
            List <string> urls = new List <string>();

            Regex regex = new Regex("href\\s*=\\s*(?:\"(?<1>[^\"]*)\"|(?<1>\\S+))",
                                    RegexOptions.IgnoreCase | RegexOptions.Compiled);

            if (regex.IsMatch(content))
            {
                MatchCollection collection = regex.Matches(content);
                foreach (Match item in collection)
                {
                    urls.Add(item.Groups[1].Value);
                }
            }
            MyConsole.AppendLine(string.Format("找到{0}个锚点..", urls.Count));

            regex = new Regex(@"(?i)<img[^>]*?\ssrc\s*=\s*(['""]?)(?<src>[^'""\s>]+)\1[^>]*>");
            MatchCollection mc = regex.Matches(content);

            foreach (Match m in mc)
            {
                urls.Add(m.Groups["src"].Value);
            }
            MyConsole.AppendLine(string.Format("找到{0}个图片..", mc.Count));

            //返回新的Url
            List <ParseModel> parseModels = RegexCondition as List <ParseModel>;

            //储存需要的文本
            if (parseModels != null && parseModels.Count > 0)
            {
                foreach (var item in parseModels)
                {
                    Regex temp = new Regex(item.RegexString);
                    if (temp.IsMatch(content))
                    {
                        MatchCollection matches = temp.Matches(content);
                        foreach (Match match in matches)
                        {
                            ContentManger.Save(baseForlder, Encoding.Default.GetBytes(match.Value), item.SaveType, Guid.NewGuid().ToString() + ".txt");
                            _main.DownloadFileCount++;
                        }
                    }
                }
            }
            return(urls);
        }