Ejemplo n.º 1
0
        private void Design()
        {
            (!string.IsNullOrWhiteSpace(CrawlerSelector)).SafeCheck("采集器名称不能为空");

            var isRealJson = false;
            var newhtml    = JavaScriptAnalyzer.Json2XML(lastData, out isRealJson, true);

            if (!(isRealJson).SafeCheck("只有标准json格式才能启用采集器设计"))
            {
                return;
            }
            var selector = GetCrawler(CrawlerSelector);

            if (selector == null)
            {
                if (MessageBox.Show($"是否要创建名为{CrawlerSelector}的网页采集器?", "提示信息", MessageBoxButton.OKCancel) !=
                    MessageBoxResult.OK)
                {
                    return;
                }
                var crawler = new SmartCrawler();
                crawler.Name = CrawlerSelector;
                processManager.CurrentProcessCollections.Add(crawler);
                selector = crawler;
            }

            (MainDescription.MainFrm as IDockableManager).ActiveThisContent(CrawlerSelector);
            selector.URLHTML = newhtml;
            selector.HtmlDoc.LoadHtml(newhtml);
            selector.enableRefresh = false;
            //selector.GreatHand();
        }
Ejemplo n.º 2
0
        public override bool Init(IEnumerable <IFreeDocument> datas)
        {
            OneOutput = false;
            crawler   =
                processManager.CurrentProcessCollections.FirstOrDefault(d => d.Name == CrawlerSelector) as SmartCrawler;
            if (crawler != null)
            {
                IsMultiYield = crawler?.IsMultiData == ListType.List;
            }
            else
            {
                var task = processManager.CurrentProject.Tasks.FirstOrDefault(d => d.Name == CrawlerSelector);
                if (task == null)
                {
                    return(false);
                }
                ControlExtended.UIInvoke(() => { task.Load(false); });
                crawler =
                    processManager.CurrentProcessCollections.FirstOrDefault(d => d.Name == CrawlerSelector) as
                    SmartCrawler;
            }


            return(crawler != null && base.Init(datas));
        }
Ejemplo n.º 3
0
        public static string GenerateRemark(this SmartCrawler tool, bool addnew, IProcessManager manager)
        {
            var list = new List <string>();

            if (addnew)
            {
                list.Add(GlobalHelper.FormatArgs("doc_task_new", tool.TypeName, tool.Name));
            }
            if (string.IsNullOrWhiteSpace(tool.Remark) == false)
            {
                list.Add(GlobalHelper.RandomFormatArgs("reason_desc", tool.Remark));
            }
            list.Add(GenerateItemRemark(tool, new List <string>()
            {
                "Remark", "MainPluginLocation", "URL"
            }, false));
            int index = 1;

            list.Add("\n");
            foreach (var crawlItem in tool.CrawlItems)
            {
                list.Add(GlobalHelper.FormatArgs("doc_crawler_add_xpath", crawlItem.Name, crawlItem.Format, crawlItem.CrawlType,
                                                 crawlItem.XPath, index++));
            }
            return("\n".Join(list));
        }
Ejemplo n.º 4
0
        public static IEnumerable <SmartCrawler> GetReference(this SmartCrawler etl, IProcessManager manager)
        {
            var item = manager.GetTask <SmartCrawler>(etl.ShareCookie.SelectItem);

            if (item != null)
            {
                yield return(item);
            }
        }
Ejemplo n.º 5
0
 public override bool Init(IEnumerable <IFreeDocument> datas)
 {
     OneOutput = false;
     crawler   = GetCrawler(CrawlerSelector.SelectItem);
     if (string.IsNullOrEmpty(CrawlerSelector.SelectItem) && crawler != null)
     {
         CrawlerSelector.SelectItem = crawler.Name;
     }
     return(crawler != null && base.Init(datas));
 }
Ejemplo n.º 6
0
        public override bool Init(IEnumerable <IFreeDocument> datas)
        {
            OneOutput = false;
            var name = CrawlerSelector.SelectItem;

            name    = AppHelper.Query(name, null);
            Crawler = GetCrawler(name);
            if (string.IsNullOrEmpty(CrawlerSelector.SelectItem) && Crawler != null)
            {
                CrawlerSelector.SelectItem = Crawler.Name;
            }
            return(base.Init(datas));
        }
Ejemplo n.º 7
0
 public override bool Init(IEnumerable <IFreeDocument> docus)
 {
     crawlerEnabled = false;
     if (CrawlerEnabled)
     {
         selector = GetCrawler(CrawlerSelector);
         if (selector != null)
         {
             crawlerEnabled = true;
             IsMultiYield   = selector.IsMultiData == ListType.List;
         }
     }
     else
     {
         IsMultiYield = ScriptWorkMode == ScriptWorkMode.文档列表;
     }
     lastData = null;
     return(base.Init(docus));
 }
Ejemplo n.º 8
0
        public SmartCrawler LoadCrawler(string pathConfig)
        {
            SmartFile _smartFile = new SmartFile(pathConfig);
            string    _line      = _smartFile.Lines.FirstOrDefault();

            string[] _items         = _line.Split(new string[] { "|" }, StringSplitOptions.None);
            string   _crawlerName   = _items[0].Trim();
            string   _crawlerUrl    = _items[1].Trim();
            string   _crawlerOutput = _items[2].Trim().Trim();
            int      _dept          = int.Parse(_items[3].Trim());

            string[] _crawlerKeyWords = _items[4].Split(new string[] { ";" }, StringSplitOptions.None)
                                        .Where(t => t != null && t.Length > 0).ToArray();
            string[] _crawlerIgnoreKeyWords = _items[5].Split(new string[] { ";" }, StringSplitOptions.None)
                                              .Where(t => t != null && t.Length > 0).ToArray();

            var _crawler = new SmartCrawler(_crawlerName, _crawlerUrl, CrawlerOutputFolder + _crawlerOutput, _crawlerKeyWords, _crawlerIgnoreKeyWords, _dept);

            return(_crawler);
        }
Ejemplo n.º 9
0
        public override bool Init(IEnumerable <IFreeDocument> datas)
        {
            if (generator == null)
            {
                var mainstream =
                    processManager.CurrentProcessCollections.OfType <SmartETLTool>()
                    .FirstOrDefault(d => d.CurrentETLTools.Contains(this));
                generator = mainstream.CurrentETLTools.FirstOrDefault(d => d.Name == GEName) as BfsGE;
            }

            crawler =
                processManager.CurrentProcessCollections.FirstOrDefault(d => d.Name == CrawlerSelector) as SmartCrawler;
            if (crawler != null)
            {
                IsMultiYield = crawler?.IsMultiData == ListType.List;
            }
            else
            {
                var task = processManager.CurrentProject.Tasks.FirstOrDefault(d => d.Name == CrawlerSelector);
                if (task == null)
                {
                    return(false);
                }
                ControlExtended.UIInvoke(() => { task.Load(false); });
                crawler =
                    processManager.CurrentProcessCollections.FirstOrDefault(d => d.Name == CrawlerSelector) as
                    SmartCrawler;
            }



            IsMultiYield = crawler?.IsMultiData == ListType.List;
            isfirst      = true;
            OneOutput    = false;
            if (IsRegex)
            {
                regex = new Regex(Prefix);
            }
            return(crawler != null && base.Init(datas));
        }
Ejemplo n.º 10
0
 public override bool Init(IEnumerable <IFreeDocument> datas)
 {
     crawler =
         processManager.CurrentProcessCollections.FirstOrDefault(d => d.Name == CrawlerSelector) as SmartCrawler;
     if (crawler != null)
     {
     }
     else
     {
         var task = processManager.CurrentProject.Tasks.FirstOrDefault(d => d.Name == CrawlerSelector);
         if (task == null)
         {
             return(false);
         }
         ControlExtended.UIInvoke(() => { task.Load(false); });
         crawler =
             processManager.CurrentProcessCollections.FirstOrDefault(d => d.Name == CrawlerSelector) as
             SmartCrawler;
     }
     helper = new HttpHelper();
     return(base.Init(datas));
 }
Ejemplo n.º 11
0
 public override bool Init(IEnumerable <IFreeDocument> datas)
 {
     OneOutput = false;
     crawler   = GetCrawler(CrawlerSelector);
     return(crawler != null && base.Init(datas));
 }