/// <summary> /// 获取标签内容 /// </summary> /// <param name="taskName"></param> /// <param name="collectionContentStepTime"></param> /// <param name="spiderViewUrl"></param> /// <param name="itemTaskLebel"></param> /// <param name="pageContent"></param> /// <param name="isTest"></param> /// <returns></returns> private string GetLabelContent(string taskName, int collectionContentStepTime, string spiderViewUrl, ModelTaskLabel itemTaskLebel, string pageContent, bool isTest = false) { var remoteViewUrl = itemTaskLebel.TestViewUrl; if (string.IsNullOrEmpty(itemTaskLebel.TestViewUrl)) { remoteViewUrl = spiderViewUrl; } string regContent = HtmlHelper.Instance.ParseCollectionStrings(itemTaskLebel.LabelNameCutRegex); regContent = CommonHelper.ReplaceSystemRegexTag(regContent); string CutContent = CollectionHelper.Instance.CutStr(pageContent, regContent)[0]; #region 载资源 var imgTag = ImageDownHelper.GetImgTag(CutContent); if (itemTaskLebel.IsDownResource == 1) { string[] imgExtArr = itemTaskLebel.DownResourceExts.Split(new string[] { ";" }, StringSplitOptions.RemoveEmptyEntries); var downImgPath = AppDomain.CurrentDomain.BaseDirectory + "Data\\Collection\\" + taskName + "\\Images\\"; int ii = 1; foreach (var img in imgTag) { var remoteImg = CollectionHelper.Instance.FormatUrl(remoteViewUrl, img); var newImg = DateTime.Now.ToString("yyyyMMddHHmmssffffff") + "_" + ii + ".jpg"; if (!string.IsNullOrEmpty(itemTaskLebel.DownResourceExts)) { var imgExt = remoteImg.Substring(remoteImg.LastIndexOf(".")); if (imgExtArr.SingleOrDefault(x => x.ToLower() == imgExt.ToLower()) != imgExt.ToLower()) { continue; } } CutContent = CutContent.Replace(img, downImgPath + newImg); if (!isTest) { QueueImgHelper.AddImg(Model.ID, downImgPath + newImg, remoteImg, collectionContentStepTime); } ii++; } } else { foreach (var img in imgTag) { var remoteImg = CollectionHelper.Instance.FormatUrl(remoteViewUrl, img); CutContent = CutContent.Replace(img, remoteImg); } } #endregion #region 结果为循环 if (itemTaskLebel.IsLoop == 1) { string[] LabelString = CollectionHelper.Instance.CutStr(pageContent, regContent); foreach (string s in LabelString) { CutContent += s + "$$$$"; } int n = CutContent.LastIndexOf("$$$$"); CutContent = CutContent.Remove(n, 4); } #endregion #region 过滤Html if (!string.IsNullOrEmpty(itemTaskLebel.LabelHtmlRemove)) { string[] arr = itemTaskLebel.LabelHtmlRemove.Split(new string[] { "||||" }, StringSplitOptions.RemoveEmptyEntries); foreach (string str in arr) { if (str == "all") { CutContent = CollectionHelper.Instance.NoHtml(CutContent); break; } else if (str == "table") { CutContent = CollectionHelper.Instance.ScriptHtml(CutContent, "table", 2); } else if (str == "font<span>") { CutContent = CollectionHelper.Instance.ScriptHtml(CutContent, "font", 3); CutContent = CollectionHelper.Instance.ScriptHtml(CutContent, "span", 3); } else if (str == "a") { CutContent = CollectionHelper.Instance.ScriptHtml(CutContent, "a", 3); } } } #endregion #region 排除字符 if (!string.IsNullOrEmpty(itemTaskLebel.LabelRemove)) { foreach (string str in itemTaskLebel.LabelRemove.Split(new string[] { "$$$$" }, StringSplitOptions.RemoveEmptyEntries)) { string[] ListStr = str.Split(new string[] { "||" }, StringSplitOptions.RemoveEmptyEntries); if (ListStr[1] == "1") { CutContent = CollectionHelper.RemoveHtml(CutContent, ListStr[0]); } else { CutContent = CutContent.Replace(ListStr[0], ""); } } } #endregion #region 替换字符 if (!string.IsNullOrEmpty(itemTaskLebel.LabelReplace)) { foreach (string str in itemTaskLebel.LabelReplace.Split(new string[] { "$$$$" }, StringSplitOptions.RemoveEmptyEntries)) { string[] ListStr = str.Split(new string[] { "||" }, StringSplitOptions.RemoveEmptyEntries); CutContent = CutContent.Replace(ListStr[0], ListStr[1]); } } #endregion #region 加载插件 string SpiderLabelPlugin = itemTaskLebel.SpiderLabelPlugin; if (SpiderLabelPlugin != "不使用插件" && !string.IsNullOrEmpty(SpiderLabelPlugin)) { CutContent = PythonExtHelper.RunPython(PluginUtility.SpiderContentPluginPath + SpiderLabelPlugin, new object[] { remoteViewUrl, CutContent }); } #endregion return(CutContent); }
private void btnTestPython_Click(object sender, EventArgs e) { var s = PythonExtHelper.RunPython(@"Plugins\SpiderUrl\test.py", new object[] { "你好啊随碟附送大放送的" }); }