Exemple #1
0
        private void AddNewItem(bool isAlert = true)
        {
            var path = SelectXPath;

            if (!string.IsNullOrEmpty(RootXPath))
            {
                var root = HtmlDoc.DocumentNode.SelectSingleNode(RootXPath).ParentNode;
                var node = HtmlDoc.DocumentNode.SelectSingleNode(path);
                if (!node.IsAncestor(root))
                {
                    if (isAlert)
                    {
                        MessageBox.Show("当前XPath所在节点不是父节点的后代,请检查对应的XPath");
                    }
                    return;
                }
                path = new XPath(node.XPath).TakeOff(root.XPath).ToString();
            }

            var item = new CrawlItem {
                XPath = path, Name = SelectName, SampleData1 = SelectText
            };

            if (CrawlItems.Any(d => d.Name == SelectName))
            {
                SelectName = "属性" + CrawlItems.Count;
                if (isAlert)
                {
                    MessageBox.Show($"已存在名称为{SelectName}的属性,不能重复添加");
                    return;
                }
            }
            CrawlItems.Add(item);
            SelectXPath = "";
        }
Exemple #2
0
        public override void DictDeserialize(IDictionary <string, object> dicts, Scenario scenario = Scenario.Database)
        {
            base.DictDeserialize(dicts, scenario);
            URL                    = dicts.Set("URL", URL);
            RootXPath              = dicts.Set("RootXPath", RootXPath);
            Remark                 = dicts.Set("Remark", Remark);
            RootFormat             = dicts.Set("RootFormat", RootFormat);
            ShareCookie.SelectItem = dicts.Set("ShareCookie", ShareCookie.SelectItem);
            IsMultiData            = dicts.Set("IsMultiData", IsMultiData);
            IsSuperMode            = dicts.Set("IsSuperMode", IsSuperMode);
            if (dicts.ContainsKey("HttpSet"))
            {
                var doc2 = dicts["HttpSet"];
                var p    = doc2 as IDictionary <string, object>;
                Http.UnsafeDictDeserialize(p);
            }


            if (dicts.ContainsKey("Generator"))
            {
                var doc2 = dicts["Generator"];
                var p    = doc2 as IDictionary <string, object>;
            }
            var doc = dicts as FreeDocument;

            if (doc?.Children != null)
            {
                foreach (var child in doc.Children)
                {
                    var item = new CrawlItem();
                    item.DictDeserialize(child);
                    CrawlItems.Add(item);
                }
            }
        }
Exemple #3
0
        public void EditProperty()
        {
            var crawTargets = new List <XPathAnalyzer.CrawTarget>();

            crawTargets.Add(new XPathAnalyzer.CrawTarget(CrawlItems.Select(d => d.Clone()).ToList(), RootXPath,
                                                         RootFormat)
            {
                RootNode = this.HtmlDoc.DocumentNode, WorkMode = IsMultiData
            });
            var luckModel = new FeelLuckyModel(crawTargets, HtmlDoc, IsMultiData);

            luckModel.CanChange = false;
            var view = PluginProvider.GetObjectInstance <ICustomView>(GlobalHelper.Get("key_657")) as UserControl;

            view.DataContext = luckModel;

            var name   = GlobalHelper.Get("key_658");
            var window = new Window {
                Title = name
            };

            window.WindowState = WindowState.Maximized;
            window.Content     = view;
            luckModel.SetView(view, window);

            window.Activate();
            window.ShowDialog();
            if (window.DialogResult == true)
            {
                CrawlItems.Clear();
                RootXPath = luckModel.CurrentTarget.RootXPath;
                CrawlItems.AddRange(luckModel.CurrentTarget.CrawItems);
            }
        }
Exemple #4
0
        public override void DictDeserialize(IDictionary <string, object> dicts, Scenario scenario = Scenario.Database)
        {
            base.DictDeserialize(dicts, scenario);
            URL         = dicts.Set("URL", URL);
            RootXPath   = dicts.Set("RootXPath", RootXPath);
            IsMultiData = dicts.Set("IsMultiData", IsMultiData);
            IsJson2xml  = dicts.Set("IsJson2xml", IsJson2xml);
            Crawler     = dicts.Set("Crawler", Crawler);
            if (dicts.ContainsKey("HttpSet"))
            {
                var doc2 = dicts["HttpSet"];
                var p    = doc2 as IDictionary <string, object>;
                Http.UnsafeDictDeserialize(p);
            }


            if (dicts.ContainsKey("Generator"))
            {
                var doc2 = dicts["Generator"];
                var p    = doc2 as IDictionary <string, object>;
            }
            var doc = dicts as FreeDocument;

            if (doc?.Children != null)
            {
                foreach (var child in doc.Children)
                {
                    var item = new CrawlItem();
                    item.DictDeserialize(child);
                    CrawlItems.Add(item);
                }
            }
        }
Exemple #5
0
        private void AddNewItem(bool isAlert = true)
        {
            var path = SelectXPath;

            if (!string.IsNullOrEmpty(RootXPath))
            {
                //TODO: 当XPath路径错误时,需要捕获异常
                HtmlNode root = null;
                try
                {
                    root = HtmlDoc.DocumentNode.SelectSingleNode(RootXPath);
                }
                catch (Exception ex)
                {
                    XLogSys.Print.Error($"{RootXPath}  不能被识别为正确的XPath表达式,请检查");
                }
                if (!(root != null).SafeCheck("使用当前父节点XPath,在文档中找不到任何父节点"))
                {
                    return;
                }
                root = HtmlDoc.DocumentNode.SelectSingleNode(RootXPath)?.ParentNode;

                HtmlNode node = null;
                if (
                    !ControlExtended.SafeInvoke(() => HtmlDoc.DocumentNode.SelectSingleNode(path), ref node,
                                                LogType.Info, "检查子节点XPath正确性", true))

                {
                    return;
                }
                if (!(node != null).SafeCheck("使用当前子节点XPath,在文档中找不到任何子节点"))
                {
                    return;
                }

                if (!node.IsAncestor(root) && isAlert)
                {
                    if (
                        MessageBox.Show("当前XPath所在节点不是父节点的后代,请检查对应的XPath,是否依然要添加?", "提示信息", MessageBoxButton.YesNo) ==
                        MessageBoxResult.No)
                    {
                        return;
                    }
                }
                path = XPath.TakeOff(node.XPath, root.XPath);
            }
            if (CrawlItems.FirstOrDefault(d => d.Name == SelectName) == null ||
                MessageBox.Show("已经存在同名的属性,是否依然添加?", "提示信息", MessageBoxButton.OKCancel) == MessageBoxResult.OK)
            {
                var item = new CrawlItem {
                    XPath = path, Name = SelectName, SampleData1 = SelectText
                };

                CrawlItems.Add(item);

                SelectXPath = "";
                SelectName  = "";
                XLogSys.Print.Info("成功添加属性");
            }
        }
Exemple #6
0
        //public void AutoVisit()
        //{
        //    if (Documents.Any())
        //    {
        //        var item = new HttpItem();
        //        Documents[0].DictCopyTo(item);
        //        var res = helper != null && helper.AutoVisit(item);
        //        XLogSys.Print.Info("成功模拟登录");
        //        Http.SetValue("Cookie", item.GetValue("Cookie"));
        //        if (res)
        //        {
        //            URL = item.URL;
        //        }
        //    }
        //}

        private void GreatHand()
        {
            var crawitems = HtmlDoc.SearchPropertiesSmart(CrawlItems, IsAttribute).FirstOrDefault();

            if ((crawitems != null).SafeCheck("网页属性获取", LogType.Info) == false)
            {
                return;
            }


            var datas = HtmlDoc.GetDataFromXPath(crawitems, IsMultiData);

            var propertyNames = new FreeDocument(datas.GetKeys().ToDictionary(d => d, d => (object)d));

            datas.Insert(0, propertyNames);
            var view = PluginProvider.GetObjectInstance <IDataViewer>("可编辑列表");
            var r    = view.SetCurrentView(datas);


            var name   = "手气不错_可修改第一列的属性名称";
            var window = new Window {
                Title = name
            };

            window.Content  = r;
            window.Closing += (s, e) =>
            {
                if (ControlExtended.UserCheck("是否确认选择当前的数据表") == false)
                {
                    return;
                }

                foreach (var propertyName in propertyNames)
                {
                    var item = crawitems.FirstOrDefault(d => d.Name == propertyName.Key);
                    if (item == null)
                    {
                        continue;
                    }
                    if (propertyName.Value == null)
                    {
                        continue;
                    }
                    item.Name = propertyName.Value.ToString();
                }
                CrawlItems.Clear();
                CrawlItems.AddRange(crawitems);
            };


            window.ShowDialog();
        }
Exemple #7
0
        private void AddNewItem(bool isAlert = true)
        {
            var path = SelectXPath;

            if (!string.IsNullOrEmpty(RootXPath))
            {
                //TODO: 当XPath路径错误时,需要捕获异常
                var root = HtmlDoc.DocumentNode.SelectSingleNode(RootXPath);
                if (!(root != null).SafeCheck("使用当前父节点XPath,在文档中找不到任何父节点"))
                {
                    return;
                }
                root = HtmlDoc.DocumentNode.SelectSingleNode(RootXPath)?.ParentNode;
                var node = HtmlDoc.DocumentNode.SelectSingleNode(path);
                if (!(node != null).SafeCheck("使用当前子节点XPath,在文档中找不到任何子节点"))
                {
                    return;
                }
                if (!node.IsAncestor(root))
                {
                    if (isAlert)
                    {
                        if (
                            MessageBox.Show("当前XPath所在节点不是父节点的后代,请检查对应的XPath,是否依然要添加?", "提示信息", MessageBoxButton.YesNo) ==
                            MessageBoxResult.Yes)
                        {
                            path = new XPath(node.XPath).TakeOff(root.XPath).ToString();
                        }
                        else
                        {
                            return;
                        }
                    }
                }
            }

            var item = new CrawlItem {
                XPath = path, Name = SelectName, SampleData1 = SelectText
            };

            if (CrawlItems.Any(d => d.Name == SelectName))
            {
                SelectName = "属性" + CrawlItems.Count;
                if (isAlert)
                {
                    MessageBox.Show($"已存在名称为{SelectName}的属性,不能重复添加");
                    return;
                }
            }
            CrawlItems.Add(item);
            SelectXPath = "";
        }
Exemple #8
0
        public override FreeDocument DictSerialize(Scenario scenario = Scenario.Database)
        {
            var dict = base.DictSerialize(scenario);

            dict.Add("URL", URL);
            dict.Add("RootXPath", RootXPath);
            dict.Add("IsMultiData", IsMultiData);
            dict.Add("IsSuperMode", IsSuperMode);
            dict.Add("HttpSet", Http.DictSerialize());
            dict.Children = new List <FreeDocument>();
            dict.Children.AddRange(CrawlItems.Select(d => d.DictSerialize(scenario)));
            return(dict);
        }
 private void Search()
 {
     if (string.IsNullOrWhiteSpace(selectText) == false)
     {
         var xpaths = HtmlDoc.SearchXPath(SelectText, () => true).ToList();
         CrawlItems.Clear();
         xpaths.Execute(d => CrawlItems.Add(new CrawlItem
         {
             XPath       = d,
             SampleData1 = HtmlDoc.DocumentNode.SelectSingleNodePlus(d, SelectorFormat.XPath).InnerText
         }));
     }
 }
Exemple #10
0
 public override FreeDocument DictSerialize(Scenario scenario = Scenario.Database)
 {
     var dict = base.DictSerialize(scenario);
     dict.Add("URL", URL);
     dict.Add("RootXPath", RootXPath);
     dict.Add("IsMultiData", IsMultiData);
     dict.Add("HttpSet", Http.DictSerialize());
     dict.Add("URLFilter", URLFilter);
     dict.Add("ContentFilter", ContentFilter);
     dict.Add("Crawler", Crawler);
     dict.Children = new List<FreeDocument>();
     if (Documents.Any())
         dict.Add("Login", Documents[0].DictSerialize());
     dict.Children.AddRange(CrawlItems.Select(d => d.DictSerialize(scenario)));
     return dict;
 }
Exemple #11
0
        public void FeelLucky()
        {
            isBusy = true;
            var crawTargets = new List <XPathAnalyzer.CrawTarget>();
            var task        = TemporaryTask.AddTempTask("网页结构计算中",
                                                        HtmlDoc.SearchPropertiesSmart(CrawlItems, RootXPath, IsAttribute), crawTarget =>
            {
                crawTargets.Add(crawTarget);
                var datas        = HtmlDoc.GetDataFromXPath(crawTarget.CrawItems, IsMultiData, crawTarget.RootXPath);
                crawTarget.Datas = datas;
            }, d =>
            {
                isBusy = false;
                if (crawTargets.Count == 0)
                {
                    CrawTarget = null;
                    XLogSys.Print.Warn("没有检查到任何可选的列表页面");
                    return;
                }

                var luckModel    = new FeelLuckyModel(crawTargets, HtmlDoc);
                var view         = PluginProvider.GetObjectInstance <ICustomView>("手气不错面板") as UserControl;
                view.DataContext = luckModel;

                var name   = "手气不错";
                var window = new Window {
                    Title = name
                };
                window.WindowState = WindowState.Maximized;
                window.Content     = view;
                luckModel.SetView(view, window);
                window.Activate();
                window.ShowDialog();
                if (window.DialogResult == true)

                {
                    var crawTarget = luckModel.CurrentTarget;
                    RootXPath      = crawTarget.RootXPath;
                    CrawlItems.Clear();
                    CrawlItems.AddRange(crawTarget.CrawItems.Where(r => r.IsEnabled));
                }
            });

            SysProcessManager.CurrentProcessTasks.Add(task);
        }
Exemple #12
0
        private void AddNewItem(bool isAlert = true)
        {
            var item = new CrawlItem {
                XPath = SelectXPath, Name = SelectName, SampleData1 = SelectText
            };

            if (CrawlItems.Any(d => d.Name == SelectName))
            {
                SelectName = "属性" + CrawlItems.Count;
                if (isAlert)
                {
                    MessageBox.Show($"已存在名称为{SelectName}的属性,不能重复添加");
                    return;
                }
            }
            CrawlItems.Add(item);
            SelectXPath = "";
        }
Exemple #13
0
        private void AddNewItem(bool isAlert = true)
        {
            var path     = SelectXPath;
            var rootPath = RootXPath;

            if (!string.IsNullOrEmpty(rootPath))
            {
                //TODO: 当XPath路径错误时,需要捕获异常
                HtmlNode root = null;
                try
                {
                    root = HtmlDoc.DocumentNode.SelectSingleNodePlus(rootPath, RootFormat);
                }
                catch (Exception)
                {
                    XLogSys.Print.Error(string.Format(GlobalHelper.Get("key_662"), RootXPath, RootFormat));
                }
                if (!(root != null).SafeCheck(string.Format(GlobalHelper.Get("key_663"), RootFormat, RootXPath)))
                {
                    return;
                }
                root = HtmlDoc.DocumentNode.SelectSingleNodePlus(rootPath, RootFormat)?.ParentNode;

                HtmlNode node = null;
                if (
                    !ControlExtended.SafeInvoke(() => HtmlDoc.DocumentNode.SelectSingleNodePlus(path, SearchFormat),
                                                ref node,
                                                LogType.Info, GlobalHelper.Get("key_664"), true))

                {
                    return;
                }
                if (!(node != null).SafeCheck(GlobalHelper.Get("key_665")))
                {
                    return;
                }

                if (!node.IsAncestor(root) && isAlert)
                {
                    if (
                        MessageBox.Show(GlobalHelper.Get("key_666"), GlobalHelper.Get("key_99"), MessageBoxButton.YesNo) ==
                        MessageBoxResult.No)
                    {
                        return;
                    }
                }
                string attr      = "";
                string attrValue = "";
                XPathAnalyzer.GetAttribute(path, out attr, out attrValue);
                if (SearchFormat == SelectorFormat.XPath)
                {
                    path = XPath.TakeOffPlus(node.XPath, root.XPath);
                    if (attr != "")
                    {
                        path += "/@" + attr + "[1]";
                    }
                }
            }
            if (CrawlItems.FirstOrDefault(d => d.Name == SelectName) == null ||
                MessageBox.Show(GlobalHelper.Get("add_column_sure"), GlobalHelper.Get("key_99"), MessageBoxButton.OKCancel) == MessageBoxResult.OK)
            {
                var item = new CrawlItem {
                    XPath = path, Name = SelectName, SampleData1 = SelectText
                };
                item.Format = SearchFormat;
                CrawlItems.Add(item);

                SelectXPath = "";
                SelectName  = "";

                XLogSys.Print.Info(GlobalHelper.Get("key_668"));
            }
        }
Exemple #14
0
        public void FeelLucky()
        {
            if (string.IsNullOrEmpty(this.URLHTML))
            {
                this.VisitUrlAsync();
            }
            isBusy = true;
            var crawTargets = new List <XPathAnalyzer.CrawTarget>();
            ICollection <CrawlItem> existItems = CrawlItems;

            if (IsMultiData == ScriptWorkMode.One)
            {
                existItems = new List <CrawlItem> {
                    new CrawlItem {
                        Name = "temp", XPath = SelectXPath
                    }
                }
            }
            ;
            var task = TemporaryTask <XPathAnalyzer.CrawTarget> .AddTempTaskSimple(GlobalHelper.Get("key_659"),
                                                                                   HtmlDoc.DocumentNode.SearchPropertiesSmart(IsMultiData, existItems, RootXPath, RootFormat, IsAttribute),
                                                                                   crawTarget =>
            {
                crawTargets.Add(crawTarget);
                //var datas =
                //    HtmlDoc.DocumentNode.GetDataFromXPath(crawTarget.CrawItems, IsMultiData, crawTarget.RootXPath,
                //        RootFormat).ToList();
                //crawTarget.Datas = datas;
            }, d =>
            {
                isBusy = false;
                if (crawTargets.Count == 0)
                {
                    XLogSys.Print.Warn(GlobalHelper.Get("key_660"));
                    return;
                }

                var luckModel    = new FeelLuckyModel(crawTargets, HtmlDoc, IsMultiData);
                var view         = PluginProvider.GetObjectInstance <ICustomView>(GlobalHelper.Get("key_657")) as UserControl;
                view.DataContext = luckModel;

                var name   = GlobalHelper.Get("feellucky");
                var window = new Window {
                    Title = name
                };
                window.WindowState = WindowState.Maximized;
                window.Content     = view;
                luckModel.SetView(view, window);
                window.Activate();
                window.ShowDialog();
                if (window.DialogResult == true)

                {
                    var crawTarget = luckModel.CurrentTarget;
                    if (string.IsNullOrEmpty(RootXPath))
                    {
                        RootFormat = SelectorFormat.XPath;
                    }
                    RootXPath = crawTarget.RootXPath;


                    CrawlItems.AddRange(crawTarget.CrawItems.Where(r => r.IsEnabled && CrawlItems.FirstOrDefault(d2 => d2.XPath == r.XPath) == null));
                }
            });

            SysProcessManager.CurrentProcessTasks.Add(task);
        }
Exemple #15
0
        public void GreatHand()
        {
            var count              = 0;
            var crawTargets        = HtmlDoc.SearchPropertiesSmart(CrawlItems, RootXPath, IsAttribute);
            var currentCrawTargets = crawTargets.GetEnumerator();
            var result             = currentCrawTargets.MoveNext();

            if (result)
            {
                CrawTarget = currentCrawTargets.Current;
            }
            else
            {
                CrawTarget = null;
                XLogSys.Print.Warn("没有检查到任何可选的列表页面");
                return;
            }

            var crawitems     = CrawTarget.CrawItems;
            var datas         = HtmlDoc.GetDataFromXPath(crawitems, IsMultiData, CrawTarget.RootXPath);
            var propertyNames = new FreeDocument(datas.GetKeys().ToDictionary(d => d, d => (object)d));

            datas.Insert(0, propertyNames);
            var view = PluginProvider.GetObjectInstance <IDataViewer>("可编辑列表");
            var r    = view.SetCurrentView(datas);


            var name   = "手气不错_可修改第一列的属性名称";
            var window = new Window {
                Title = name
            };

            window.Content  = r;
            window.Closing += (s, e) =>
            {
                var check = MessageBox.Show("是否确认选择当前结果?【是】:确认结果,  【否】:检查下个目标,  【取消】:结束当前手气不错", "提示信息",
                                            MessageBoxButton.YesNoCancel);
                switch (check)
                {
                case MessageBoxResult.Yes:
                    foreach (var propertyName in propertyNames)
                    {
                        var item = crawitems.FirstOrDefault(d => d.Name == propertyName.Key);
                        if (item == null)
                        {
                            continue;
                        }
                        if (propertyName.Value == null)
                        {
                            continue;
                        }
                        item.Name = propertyName.Value.ToString();
                    }
                    CrawlItems.Clear();
                    RootXPath = CrawTarget.RootXPath;
                    CrawlItems.AddRange(crawitems);
                    currentCrawTargets = null;
                    break;

                case MessageBoxResult.No:
                    e.Cancel = true;
                    result   = currentCrawTargets.MoveNext();
                    count++;
                    if (result)
                    {
                        CrawTarget = currentCrawTargets.Current;
                    }
                    else
                    {
                        MessageBox.Show("已经搜索所有可能情况,搜索器已经返回开头");
                        crawTargets        = HtmlDoc.SearchPropertiesSmart(CrawlItems, RootXPath, IsAttribute);
                        currentCrawTargets = crawTargets.GetEnumerator();
                        count  = 0;
                        result = currentCrawTargets.MoveNext();
                        if (!result)
                        {
                            e.Cancel = false;
                        }
                        else
                        {
                            CrawTarget = currentCrawTargets.Current;
                        }
                    }

                    crawitems = CrawTarget.CrawItems;
                    var title = $"手气不错,第{count}次结果";
                    datas         = HtmlDoc.GetDataFromXPath(crawitems, IsMultiData, CrawTarget.RootXPath);
                    propertyNames = new FreeDocument(datas.GetKeys().ToDictionary(d => d, d => (object)d));
                    datas.Insert(0, propertyNames);
                    r = view.SetCurrentView(datas);
                    window.Content = r;
                    window.Title   = title;
                    break;

                case MessageBoxResult.Cancel:
                    return;
                }
            };


            window.ShowDialog();
        }
Exemple #16
0
        private void AddNewItem(bool isAlert = true)
        {
            var path     = SelectXPath;
            var rootPath = RootXPath;

            if (!string.IsNullOrEmpty(rootPath))
            {
                //TODO: 当XPath路径错误时,需要捕获异常
                HtmlNode root = null;
                try
                {
                    root = HtmlDoc.DocumentNode.SelectSingleNodePlus(rootPath, RootFormat);
                }
                catch (Exception ex)
                {
                    XLogSys.Print.Error($"{RootXPath}  不能被识别为正确的{RootFormat}表达式,请检查");
                }
                if (!(root != null).SafeCheck($"使用当前父节点{RootFormat} {RootXPath},在文档中找不到任何父节点"))
                {
                    return;
                }
                root = HtmlDoc.DocumentNode.SelectSingleNodePlus(rootPath, RootFormat)?.ParentNode;

                HtmlNode node = null;
                if (
                    !ControlExtended.SafeInvoke(() => HtmlDoc.DocumentNode.SelectSingleNodePlus(path, SearchFormat),
                                                ref node,
                                                LogType.Info, "检查子节点XPath正确性", true))

                {
                    return;
                }
                if (!(node != null).SafeCheck("使用当前子节点XPath,在文档中找不到任何子节点"))
                {
                    return;
                }

                if (!node.IsAncestor(root) && isAlert)
                {
                    if (
                        MessageBox.Show("当前XPath所在节点不是父节点的后代,请检查对应的XPath,是否依然要添加?", "提示信息", MessageBoxButton.YesNo) ==
                        MessageBoxResult.No)
                    {
                        return;
                    }
                }
                string attr      = "";
                string attrValue = "";
                XPathAnalyzer.GetAttribute(path, out attr, out attrValue);
                if (SearchFormat == SelectorFormat.XPath)
                {
                    path = XPath.TakeOffPlus(node.XPath, root.XPath);
                    if (attr != "")
                    {
                        path += "/@" + attr + "[1]";
                    }
                }
            }
            if (CrawlItems.FirstOrDefault(d => d.Name == SelectName) == null ||
                MessageBox.Show("已经存在同名的属性,是否依然添加?", "提示信息", MessageBoxButton.OKCancel) == MessageBoxResult.OK)
            {
                var item = new CrawlItem {
                    XPath = path, Name = SelectName, SampleData1 = SelectText
                };
                item.Format = SearchFormat;
                CrawlItems.Add(item);

                SelectXPath = "";
                SelectName  = "";

                XLogSys.Print.Info("成功添加属性");
            }
        }