private void Design() { (!string.IsNullOrWhiteSpace(CrawlerSelector)).SafeCheck("采集器名称不能为空"); var isRealJson = false; var newhtml = JavaScriptAnalyzer.Json2XML(lastData, out isRealJson, true); if (!(isRealJson).SafeCheck("只有标准json格式才能启用采集器设计")) { return; } var selector = GetCrawler(CrawlerSelector); if (selector == null) { if (MessageBox.Show($"是否要创建名为{CrawlerSelector}的网页采集器?", "提示信息", MessageBoxButton.OKCancel) != MessageBoxResult.OK) { return; } var crawler = new SmartCrawler(); crawler.Name = CrawlerSelector; processManager.CurrentProcessCollections.Add(crawler); selector = crawler; } (MainDescription.MainFrm as IDockableManager).ActiveThisContent(CrawlerSelector); selector.URLHTML = newhtml; selector.HtmlDoc.LoadHtml(newhtml); selector.enableRefresh = false; //selector.GreatHand(); }
public override bool Init(IEnumerable <IFreeDocument> datas) { OneOutput = false; crawler = processManager.CurrentProcessCollections.FirstOrDefault(d => d.Name == CrawlerSelector) as SmartCrawler; if (crawler != null) { IsMultiYield = crawler?.IsMultiData == ListType.List; } else { var task = processManager.CurrentProject.Tasks.FirstOrDefault(d => d.Name == CrawlerSelector); if (task == null) { return(false); } ControlExtended.UIInvoke(() => { task.Load(false); }); crawler = processManager.CurrentProcessCollections.FirstOrDefault(d => d.Name == CrawlerSelector) as SmartCrawler; } return(crawler != null && base.Init(datas)); }
public static string GenerateRemark(this SmartCrawler tool, bool addnew, IProcessManager manager) { var list = new List <string>(); if (addnew) { list.Add(GlobalHelper.FormatArgs("doc_task_new", tool.TypeName, tool.Name)); } if (string.IsNullOrWhiteSpace(tool.Remark) == false) { list.Add(GlobalHelper.RandomFormatArgs("reason_desc", tool.Remark)); } list.Add(GenerateItemRemark(tool, new List <string>() { "Remark", "MainPluginLocation", "URL" }, false)); int index = 1; list.Add("\n"); foreach (var crawlItem in tool.CrawlItems) { list.Add(GlobalHelper.FormatArgs("doc_crawler_add_xpath", crawlItem.Name, crawlItem.Format, crawlItem.CrawlType, crawlItem.XPath, index++)); } return("\n".Join(list)); }
public static IEnumerable <SmartCrawler> GetReference(this SmartCrawler etl, IProcessManager manager) { var item = manager.GetTask <SmartCrawler>(etl.ShareCookie.SelectItem); if (item != null) { yield return(item); } }
public override bool Init(IEnumerable <IFreeDocument> datas) { OneOutput = false; crawler = GetCrawler(CrawlerSelector.SelectItem); if (string.IsNullOrEmpty(CrawlerSelector.SelectItem) && crawler != null) { CrawlerSelector.SelectItem = crawler.Name; } return(crawler != null && base.Init(datas)); }
public override bool Init(IEnumerable <IFreeDocument> datas) { OneOutput = false; var name = CrawlerSelector.SelectItem; name = AppHelper.Query(name, null); Crawler = GetCrawler(name); if (string.IsNullOrEmpty(CrawlerSelector.SelectItem) && Crawler != null) { CrawlerSelector.SelectItem = Crawler.Name; } return(base.Init(datas)); }
public override bool Init(IEnumerable <IFreeDocument> docus) { crawlerEnabled = false; if (CrawlerEnabled) { selector = GetCrawler(CrawlerSelector); if (selector != null) { crawlerEnabled = true; IsMultiYield = selector.IsMultiData == ListType.List; } } else { IsMultiYield = ScriptWorkMode == ScriptWorkMode.文档列表; } lastData = null; return(base.Init(docus)); }
public SmartCrawler LoadCrawler(string pathConfig) { SmartFile _smartFile = new SmartFile(pathConfig); string _line = _smartFile.Lines.FirstOrDefault(); string[] _items = _line.Split(new string[] { "|" }, StringSplitOptions.None); string _crawlerName = _items[0].Trim(); string _crawlerUrl = _items[1].Trim(); string _crawlerOutput = _items[2].Trim().Trim(); int _dept = int.Parse(_items[3].Trim()); string[] _crawlerKeyWords = _items[4].Split(new string[] { ";" }, StringSplitOptions.None) .Where(t => t != null && t.Length > 0).ToArray(); string[] _crawlerIgnoreKeyWords = _items[5].Split(new string[] { ";" }, StringSplitOptions.None) .Where(t => t != null && t.Length > 0).ToArray(); var _crawler = new SmartCrawler(_crawlerName, _crawlerUrl, CrawlerOutputFolder + _crawlerOutput, _crawlerKeyWords, _crawlerIgnoreKeyWords, _dept); return(_crawler); }
public override bool Init(IEnumerable <IFreeDocument> datas) { if (generator == null) { var mainstream = processManager.CurrentProcessCollections.OfType <SmartETLTool>() .FirstOrDefault(d => d.CurrentETLTools.Contains(this)); generator = mainstream.CurrentETLTools.FirstOrDefault(d => d.Name == GEName) as BfsGE; } crawler = processManager.CurrentProcessCollections.FirstOrDefault(d => d.Name == CrawlerSelector) as SmartCrawler; if (crawler != null) { IsMultiYield = crawler?.IsMultiData == ListType.List; } else { var task = processManager.CurrentProject.Tasks.FirstOrDefault(d => d.Name == CrawlerSelector); if (task == null) { return(false); } ControlExtended.UIInvoke(() => { task.Load(false); }); crawler = processManager.CurrentProcessCollections.FirstOrDefault(d => d.Name == CrawlerSelector) as SmartCrawler; } IsMultiYield = crawler?.IsMultiData == ListType.List; isfirst = true; OneOutput = false; if (IsRegex) { regex = new Regex(Prefix); } return(crawler != null && base.Init(datas)); }
public override bool Init(IEnumerable <IFreeDocument> datas) { crawler = processManager.CurrentProcessCollections.FirstOrDefault(d => d.Name == CrawlerSelector) as SmartCrawler; if (crawler != null) { } else { var task = processManager.CurrentProject.Tasks.FirstOrDefault(d => d.Name == CrawlerSelector); if (task == null) { return(false); } ControlExtended.UIInvoke(() => { task.Load(false); }); crawler = processManager.CurrentProcessCollections.FirstOrDefault(d => d.Name == CrawlerSelector) as SmartCrawler; } helper = new HttpHelper(); return(base.Init(datas)); }
public override bool Init(IEnumerable <IFreeDocument> datas) { OneOutput = false; crawler = GetCrawler(CrawlerSelector); return(crawler != null && base.Init(datas)); }