/// <summary>
        /// 获取标签内容
        /// </summary>
        /// <param name="taskName"></param>
        /// <param name="collectionContentStepTime"></param>
        /// <param name="spiderViewUrl"></param>
        /// <param name="itemTaskLebel"></param>
        /// <param name="pageContent"></param>
        /// <param name="isTest"></param>
        /// <returns></returns>
        private string GetLabelContent(string taskName, int collectionContentStepTime, string spiderViewUrl, ModelTaskLabel itemTaskLebel, string pageContent, bool isTest = false)
        {
            var remoteViewUrl = itemTaskLebel.TestViewUrl;

            if (string.IsNullOrEmpty(itemTaskLebel.TestViewUrl))
            {
                remoteViewUrl = spiderViewUrl;
            }

            string regContent = HtmlHelper.Instance.ParseCollectionStrings(itemTaskLebel.LabelNameCutRegex);

            regContent = CommonHelper.ReplaceSystemRegexTag(regContent);
            string CutContent = CollectionHelper.Instance.CutStr(pageContent, regContent)[0];

            #region  载资源
            var imgTag = ImageDownHelper.GetImgTag(CutContent);
            if (itemTaskLebel.IsDownResource == 1)
            {
                string[] imgExtArr   = itemTaskLebel.DownResourceExts.Split(new string[] { ";" }, StringSplitOptions.RemoveEmptyEntries);
                var      downImgPath = AppDomain.CurrentDomain.BaseDirectory + "Data\\Collection\\" + taskName + "\\Images\\";
                int      ii          = 1;
                foreach (var img in imgTag)
                {
                    var remoteImg = CollectionHelper.Instance.FormatUrl(remoteViewUrl, img);
                    var newImg    = DateTime.Now.ToString("yyyyMMddHHmmssffffff") + "_" + ii + ".jpg";
                    if (!string.IsNullOrEmpty(itemTaskLebel.DownResourceExts))
                    {
                        var imgExt = remoteImg.Substring(remoteImg.LastIndexOf("."));
                        if (imgExtArr.SingleOrDefault(x => x.ToLower() == imgExt.ToLower()) != imgExt.ToLower())
                        {
                            continue;
                        }
                    }
                    CutContent = CutContent.Replace(img, downImgPath + newImg);
                    if (!isTest)
                    {
                        QueueImgHelper.AddImg(Model.ID, downImgPath + newImg, remoteImg, collectionContentStepTime);
                    }
                    ii++;
                }
            }
            else
            {
                foreach (var img in imgTag)
                {
                    var remoteImg = CollectionHelper.Instance.FormatUrl(remoteViewUrl, img);
                    CutContent = CutContent.Replace(img, remoteImg);
                }
            }
            #endregion

            #region 结果为循环
            if (itemTaskLebel.IsLoop == 1)
            {
                string[] LabelString = CollectionHelper.Instance.CutStr(pageContent, regContent);
                foreach (string s in LabelString)
                {
                    CutContent += s + "$$$$";
                }
                int n = CutContent.LastIndexOf("$$$$");
                CutContent = CutContent.Remove(n, 4);
            }
            #endregion

            #region 过滤Html
            if (!string.IsNullOrEmpty(itemTaskLebel.LabelHtmlRemove))
            {
                string[] arr = itemTaskLebel.LabelHtmlRemove.Split(new string[] { "||||" }, StringSplitOptions.RemoveEmptyEntries);
                foreach (string str in arr)
                {
                    if (str == "all")
                    {
                        CutContent = CollectionHelper.Instance.NoHtml(CutContent);
                        break;
                    }
                    else if (str == "table")
                    {
                        CutContent = CollectionHelper.Instance.ScriptHtml(CutContent, "table", 2);
                    }
                    else if (str == "font<span>")
                    {
                        CutContent = CollectionHelper.Instance.ScriptHtml(CutContent, "font", 3);
                        CutContent = CollectionHelper.Instance.ScriptHtml(CutContent, "span", 3);
                    }
                    else if (str == "a")
                    {
                        CutContent = CollectionHelper.Instance.ScriptHtml(CutContent, "a", 3);
                    }
                }
            }
            #endregion

            #region 排除字符
            if (!string.IsNullOrEmpty(itemTaskLebel.LabelRemove))
            {
                foreach (string str in itemTaskLebel.LabelRemove.Split(new string[] { "$$$$" }, StringSplitOptions.RemoveEmptyEntries))
                {
                    string[] ListStr = str.Split(new string[] { "||" }, StringSplitOptions.RemoveEmptyEntries);
                    if (ListStr[1] == "1")
                    {
                        CutContent = CollectionHelper.RemoveHtml(CutContent, ListStr[0]);
                    }
                    else
                    {
                        CutContent = CutContent.Replace(ListStr[0], "");
                    }
                }
            }
            #endregion

            #region 替换字符
            if (!string.IsNullOrEmpty(itemTaskLebel.LabelReplace))
            {
                foreach (string str in itemTaskLebel.LabelReplace.Split(new string[] { "$$$$" }, StringSplitOptions.RemoveEmptyEntries))
                {
                    string[] ListStr = str.Split(new string[] { "||" }, StringSplitOptions.RemoveEmptyEntries);
                    CutContent = CutContent.Replace(ListStr[0], ListStr[1]);
                }
            }
            #endregion

            #region 加载插件
            string SpiderLabelPlugin = itemTaskLebel.SpiderLabelPlugin;
            if (SpiderLabelPlugin != "不使用插件" && !string.IsNullOrEmpty(SpiderLabelPlugin))
            {
                CutContent = PythonExtHelper.RunPython(PluginUtility.SpiderContentPluginPath + SpiderLabelPlugin, new object[] { remoteViewUrl, CutContent });
            }
            #endregion

            return(CutContent);
        }
Esempio n. 2
0
 private void btnTestPython_Click(object sender, EventArgs e)
 {
     var s = PythonExtHelper.RunPython(@"Plugins\SpiderUrl\test.py", new object[] { "你好啊随碟附送大放送的" });
 }