private void AddNewItem(bool isAlert = true) { var path = SelectXPath; if (!string.IsNullOrEmpty(RootXPath)) { var root = HtmlDoc.DocumentNode.SelectSingleNode(RootXPath).ParentNode; var node = HtmlDoc.DocumentNode.SelectSingleNode(path); if (!node.IsAncestor(root)) { if (isAlert) { MessageBox.Show("当前XPath所在节点不是父节点的后代,请检查对应的XPath"); } return; } path = new XPath(node.XPath).TakeOff(root.XPath).ToString(); } var item = new CrawlItem { XPath = path, Name = SelectName, SampleData1 = SelectText }; if (CrawlItems.Any(d => d.Name == SelectName)) { SelectName = "属性" + CrawlItems.Count; if (isAlert) { MessageBox.Show($"已存在名称为{SelectName}的属性,不能重复添加"); return; } } CrawlItems.Add(item); SelectXPath = ""; }
public override void DictDeserialize(IDictionary <string, object> dicts, Scenario scenario = Scenario.Database) { base.DictDeserialize(dicts, scenario); URL = dicts.Set("URL", URL); RootXPath = dicts.Set("RootXPath", RootXPath); Remark = dicts.Set("Remark", Remark); RootFormat = dicts.Set("RootFormat", RootFormat); ShareCookie.SelectItem = dicts.Set("ShareCookie", ShareCookie.SelectItem); IsMultiData = dicts.Set("IsMultiData", IsMultiData); IsSuperMode = dicts.Set("IsSuperMode", IsSuperMode); if (dicts.ContainsKey("HttpSet")) { var doc2 = dicts["HttpSet"]; var p = doc2 as IDictionary <string, object>; Http.UnsafeDictDeserialize(p); } if (dicts.ContainsKey("Generator")) { var doc2 = dicts["Generator"]; var p = doc2 as IDictionary <string, object>; } var doc = dicts as FreeDocument; if (doc?.Children != null) { foreach (var child in doc.Children) { var item = new CrawlItem(); item.DictDeserialize(child); CrawlItems.Add(item); } } }
public void EditProperty() { var crawTargets = new List <XPathAnalyzer.CrawTarget>(); crawTargets.Add(new XPathAnalyzer.CrawTarget(CrawlItems.Select(d => d.Clone()).ToList(), RootXPath, RootFormat) { RootNode = this.HtmlDoc.DocumentNode, WorkMode = IsMultiData }); var luckModel = new FeelLuckyModel(crawTargets, HtmlDoc, IsMultiData); luckModel.CanChange = false; var view = PluginProvider.GetObjectInstance <ICustomView>(GlobalHelper.Get("key_657")) as UserControl; view.DataContext = luckModel; var name = GlobalHelper.Get("key_658"); var window = new Window { Title = name }; window.WindowState = WindowState.Maximized; window.Content = view; luckModel.SetView(view, window); window.Activate(); window.ShowDialog(); if (window.DialogResult == true) { CrawlItems.Clear(); RootXPath = luckModel.CurrentTarget.RootXPath; CrawlItems.AddRange(luckModel.CurrentTarget.CrawItems); } }
public override void DictDeserialize(IDictionary <string, object> dicts, Scenario scenario = Scenario.Database) { base.DictDeserialize(dicts, scenario); URL = dicts.Set("URL", URL); RootXPath = dicts.Set("RootXPath", RootXPath); IsMultiData = dicts.Set("IsMultiData", IsMultiData); IsJson2xml = dicts.Set("IsJson2xml", IsJson2xml); Crawler = dicts.Set("Crawler", Crawler); if (dicts.ContainsKey("HttpSet")) { var doc2 = dicts["HttpSet"]; var p = doc2 as IDictionary <string, object>; Http.UnsafeDictDeserialize(p); } if (dicts.ContainsKey("Generator")) { var doc2 = dicts["Generator"]; var p = doc2 as IDictionary <string, object>; } var doc = dicts as FreeDocument; if (doc?.Children != null) { foreach (var child in doc.Children) { var item = new CrawlItem(); item.DictDeserialize(child); CrawlItems.Add(item); } } }
private void AddNewItem(bool isAlert = true) { var path = SelectXPath; if (!string.IsNullOrEmpty(RootXPath)) { //TODO: 当XPath路径错误时,需要捕获异常 HtmlNode root = null; try { root = HtmlDoc.DocumentNode.SelectSingleNode(RootXPath); } catch (Exception ex) { XLogSys.Print.Error($"{RootXPath} 不能被识别为正确的XPath表达式,请检查"); } if (!(root != null).SafeCheck("使用当前父节点XPath,在文档中找不到任何父节点")) { return; } root = HtmlDoc.DocumentNode.SelectSingleNode(RootXPath)?.ParentNode; HtmlNode node = null; if ( !ControlExtended.SafeInvoke(() => HtmlDoc.DocumentNode.SelectSingleNode(path), ref node, LogType.Info, "检查子节点XPath正确性", true)) { return; } if (!(node != null).SafeCheck("使用当前子节点XPath,在文档中找不到任何子节点")) { return; } if (!node.IsAncestor(root) && isAlert) { if ( MessageBox.Show("当前XPath所在节点不是父节点的后代,请检查对应的XPath,是否依然要添加?", "提示信息", MessageBoxButton.YesNo) == MessageBoxResult.No) { return; } } path = XPath.TakeOff(node.XPath, root.XPath); } if (CrawlItems.FirstOrDefault(d => d.Name == SelectName) == null || MessageBox.Show("已经存在同名的属性,是否依然添加?", "提示信息", MessageBoxButton.OKCancel) == MessageBoxResult.OK) { var item = new CrawlItem { XPath = path, Name = SelectName, SampleData1 = SelectText }; CrawlItems.Add(item); SelectXPath = ""; SelectName = ""; XLogSys.Print.Info("成功添加属性"); } }
//public void AutoVisit() //{ // if (Documents.Any()) // { // var item = new HttpItem(); // Documents[0].DictCopyTo(item); // var res = helper != null && helper.AutoVisit(item); // XLogSys.Print.Info("成功模拟登录"); // Http.SetValue("Cookie", item.GetValue("Cookie")); // if (res) // { // URL = item.URL; // } // } //} private void GreatHand() { var crawitems = HtmlDoc.SearchPropertiesSmart(CrawlItems, IsAttribute).FirstOrDefault(); if ((crawitems != null).SafeCheck("网页属性获取", LogType.Info) == false) { return; } var datas = HtmlDoc.GetDataFromXPath(crawitems, IsMultiData); var propertyNames = new FreeDocument(datas.GetKeys().ToDictionary(d => d, d => (object)d)); datas.Insert(0, propertyNames); var view = PluginProvider.GetObjectInstance <IDataViewer>("可编辑列表"); var r = view.SetCurrentView(datas); var name = "手气不错_可修改第一列的属性名称"; var window = new Window { Title = name }; window.Content = r; window.Closing += (s, e) => { if (ControlExtended.UserCheck("是否确认选择当前的数据表") == false) { return; } foreach (var propertyName in propertyNames) { var item = crawitems.FirstOrDefault(d => d.Name == propertyName.Key); if (item == null) { continue; } if (propertyName.Value == null) { continue; } item.Name = propertyName.Value.ToString(); } CrawlItems.Clear(); CrawlItems.AddRange(crawitems); }; window.ShowDialog(); }
private void AddNewItem(bool isAlert = true) { var path = SelectXPath; if (!string.IsNullOrEmpty(RootXPath)) { //TODO: 当XPath路径错误时,需要捕获异常 var root = HtmlDoc.DocumentNode.SelectSingleNode(RootXPath); if (!(root != null).SafeCheck("使用当前父节点XPath,在文档中找不到任何父节点")) { return; } root = HtmlDoc.DocumentNode.SelectSingleNode(RootXPath)?.ParentNode; var node = HtmlDoc.DocumentNode.SelectSingleNode(path); if (!(node != null).SafeCheck("使用当前子节点XPath,在文档中找不到任何子节点")) { return; } if (!node.IsAncestor(root)) { if (isAlert) { if ( MessageBox.Show("当前XPath所在节点不是父节点的后代,请检查对应的XPath,是否依然要添加?", "提示信息", MessageBoxButton.YesNo) == MessageBoxResult.Yes) { path = new XPath(node.XPath).TakeOff(root.XPath).ToString(); } else { return; } } } } var item = new CrawlItem { XPath = path, Name = SelectName, SampleData1 = SelectText }; if (CrawlItems.Any(d => d.Name == SelectName)) { SelectName = "属性" + CrawlItems.Count; if (isAlert) { MessageBox.Show($"已存在名称为{SelectName}的属性,不能重复添加"); return; } } CrawlItems.Add(item); SelectXPath = ""; }
public override FreeDocument DictSerialize(Scenario scenario = Scenario.Database) { var dict = base.DictSerialize(scenario); dict.Add("URL", URL); dict.Add("RootXPath", RootXPath); dict.Add("IsMultiData", IsMultiData); dict.Add("IsSuperMode", IsSuperMode); dict.Add("HttpSet", Http.DictSerialize()); dict.Children = new List <FreeDocument>(); dict.Children.AddRange(CrawlItems.Select(d => d.DictSerialize(scenario))); return(dict); }
private void Search() { if (string.IsNullOrWhiteSpace(selectText) == false) { var xpaths = HtmlDoc.SearchXPath(SelectText, () => true).ToList(); CrawlItems.Clear(); xpaths.Execute(d => CrawlItems.Add(new CrawlItem { XPath = d, SampleData1 = HtmlDoc.DocumentNode.SelectSingleNodePlus(d, SelectorFormat.XPath).InnerText })); } }
public override FreeDocument DictSerialize(Scenario scenario = Scenario.Database) { var dict = base.DictSerialize(scenario); dict.Add("URL", URL); dict.Add("RootXPath", RootXPath); dict.Add("IsMultiData", IsMultiData); dict.Add("HttpSet", Http.DictSerialize()); dict.Add("URLFilter", URLFilter); dict.Add("ContentFilter", ContentFilter); dict.Add("Crawler", Crawler); dict.Children = new List<FreeDocument>(); if (Documents.Any()) dict.Add("Login", Documents[0].DictSerialize()); dict.Children.AddRange(CrawlItems.Select(d => d.DictSerialize(scenario))); return dict; }
public void FeelLucky() { isBusy = true; var crawTargets = new List <XPathAnalyzer.CrawTarget>(); var task = TemporaryTask.AddTempTask("网页结构计算中", HtmlDoc.SearchPropertiesSmart(CrawlItems, RootXPath, IsAttribute), crawTarget => { crawTargets.Add(crawTarget); var datas = HtmlDoc.GetDataFromXPath(crawTarget.CrawItems, IsMultiData, crawTarget.RootXPath); crawTarget.Datas = datas; }, d => { isBusy = false; if (crawTargets.Count == 0) { CrawTarget = null; XLogSys.Print.Warn("没有检查到任何可选的列表页面"); return; } var luckModel = new FeelLuckyModel(crawTargets, HtmlDoc); var view = PluginProvider.GetObjectInstance <ICustomView>("手气不错面板") as UserControl; view.DataContext = luckModel; var name = "手气不错"; var window = new Window { Title = name }; window.WindowState = WindowState.Maximized; window.Content = view; luckModel.SetView(view, window); window.Activate(); window.ShowDialog(); if (window.DialogResult == true) { var crawTarget = luckModel.CurrentTarget; RootXPath = crawTarget.RootXPath; CrawlItems.Clear(); CrawlItems.AddRange(crawTarget.CrawItems.Where(r => r.IsEnabled)); } }); SysProcessManager.CurrentProcessTasks.Add(task); }
private void AddNewItem(bool isAlert = true) { var item = new CrawlItem { XPath = SelectXPath, Name = SelectName, SampleData1 = SelectText }; if (CrawlItems.Any(d => d.Name == SelectName)) { SelectName = "属性" + CrawlItems.Count; if (isAlert) { MessageBox.Show($"已存在名称为{SelectName}的属性,不能重复添加"); return; } } CrawlItems.Add(item); SelectXPath = ""; }
private void AddNewItem(bool isAlert = true) { var path = SelectXPath; var rootPath = RootXPath; if (!string.IsNullOrEmpty(rootPath)) { //TODO: 当XPath路径错误时,需要捕获异常 HtmlNode root = null; try { root = HtmlDoc.DocumentNode.SelectSingleNodePlus(rootPath, RootFormat); } catch (Exception) { XLogSys.Print.Error(string.Format(GlobalHelper.Get("key_662"), RootXPath, RootFormat)); } if (!(root != null).SafeCheck(string.Format(GlobalHelper.Get("key_663"), RootFormat, RootXPath))) { return; } root = HtmlDoc.DocumentNode.SelectSingleNodePlus(rootPath, RootFormat)?.ParentNode; HtmlNode node = null; if ( !ControlExtended.SafeInvoke(() => HtmlDoc.DocumentNode.SelectSingleNodePlus(path, SearchFormat), ref node, LogType.Info, GlobalHelper.Get("key_664"), true)) { return; } if (!(node != null).SafeCheck(GlobalHelper.Get("key_665"))) { return; } if (!node.IsAncestor(root) && isAlert) { if ( MessageBox.Show(GlobalHelper.Get("key_666"), GlobalHelper.Get("key_99"), MessageBoxButton.YesNo) == MessageBoxResult.No) { return; } } string attr = ""; string attrValue = ""; XPathAnalyzer.GetAttribute(path, out attr, out attrValue); if (SearchFormat == SelectorFormat.XPath) { path = XPath.TakeOffPlus(node.XPath, root.XPath); if (attr != "") { path += "/@" + attr + "[1]"; } } } if (CrawlItems.FirstOrDefault(d => d.Name == SelectName) == null || MessageBox.Show(GlobalHelper.Get("add_column_sure"), GlobalHelper.Get("key_99"), MessageBoxButton.OKCancel) == MessageBoxResult.OK) { var item = new CrawlItem { XPath = path, Name = SelectName, SampleData1 = SelectText }; item.Format = SearchFormat; CrawlItems.Add(item); SelectXPath = ""; SelectName = ""; XLogSys.Print.Info(GlobalHelper.Get("key_668")); } }
public void FeelLucky() { if (string.IsNullOrEmpty(this.URLHTML)) { this.VisitUrlAsync(); } isBusy = true; var crawTargets = new List <XPathAnalyzer.CrawTarget>(); ICollection <CrawlItem> existItems = CrawlItems; if (IsMultiData == ScriptWorkMode.One) { existItems = new List <CrawlItem> { new CrawlItem { Name = "temp", XPath = SelectXPath } } } ; var task = TemporaryTask <XPathAnalyzer.CrawTarget> .AddTempTaskSimple(GlobalHelper.Get("key_659"), HtmlDoc.DocumentNode.SearchPropertiesSmart(IsMultiData, existItems, RootXPath, RootFormat, IsAttribute), crawTarget => { crawTargets.Add(crawTarget); //var datas = // HtmlDoc.DocumentNode.GetDataFromXPath(crawTarget.CrawItems, IsMultiData, crawTarget.RootXPath, // RootFormat).ToList(); //crawTarget.Datas = datas; }, d => { isBusy = false; if (crawTargets.Count == 0) { XLogSys.Print.Warn(GlobalHelper.Get("key_660")); return; } var luckModel = new FeelLuckyModel(crawTargets, HtmlDoc, IsMultiData); var view = PluginProvider.GetObjectInstance <ICustomView>(GlobalHelper.Get("key_657")) as UserControl; view.DataContext = luckModel; var name = GlobalHelper.Get("feellucky"); var window = new Window { Title = name }; window.WindowState = WindowState.Maximized; window.Content = view; luckModel.SetView(view, window); window.Activate(); window.ShowDialog(); if (window.DialogResult == true) { var crawTarget = luckModel.CurrentTarget; if (string.IsNullOrEmpty(RootXPath)) { RootFormat = SelectorFormat.XPath; } RootXPath = crawTarget.RootXPath; CrawlItems.AddRange(crawTarget.CrawItems.Where(r => r.IsEnabled && CrawlItems.FirstOrDefault(d2 => d2.XPath == r.XPath) == null)); } }); SysProcessManager.CurrentProcessTasks.Add(task); }
public void GreatHand() { var count = 0; var crawTargets = HtmlDoc.SearchPropertiesSmart(CrawlItems, RootXPath, IsAttribute); var currentCrawTargets = crawTargets.GetEnumerator(); var result = currentCrawTargets.MoveNext(); if (result) { CrawTarget = currentCrawTargets.Current; } else { CrawTarget = null; XLogSys.Print.Warn("没有检查到任何可选的列表页面"); return; } var crawitems = CrawTarget.CrawItems; var datas = HtmlDoc.GetDataFromXPath(crawitems, IsMultiData, CrawTarget.RootXPath); var propertyNames = new FreeDocument(datas.GetKeys().ToDictionary(d => d, d => (object)d)); datas.Insert(0, propertyNames); var view = PluginProvider.GetObjectInstance <IDataViewer>("可编辑列表"); var r = view.SetCurrentView(datas); var name = "手气不错_可修改第一列的属性名称"; var window = new Window { Title = name }; window.Content = r; window.Closing += (s, e) => { var check = MessageBox.Show("是否确认选择当前结果?【是】:确认结果, 【否】:检查下个目标, 【取消】:结束当前手气不错", "提示信息", MessageBoxButton.YesNoCancel); switch (check) { case MessageBoxResult.Yes: foreach (var propertyName in propertyNames) { var item = crawitems.FirstOrDefault(d => d.Name == propertyName.Key); if (item == null) { continue; } if (propertyName.Value == null) { continue; } item.Name = propertyName.Value.ToString(); } CrawlItems.Clear(); RootXPath = CrawTarget.RootXPath; CrawlItems.AddRange(crawitems); currentCrawTargets = null; break; case MessageBoxResult.No: e.Cancel = true; result = currentCrawTargets.MoveNext(); count++; if (result) { CrawTarget = currentCrawTargets.Current; } else { MessageBox.Show("已经搜索所有可能情况,搜索器已经返回开头"); crawTargets = HtmlDoc.SearchPropertiesSmart(CrawlItems, RootXPath, IsAttribute); currentCrawTargets = crawTargets.GetEnumerator(); count = 0; result = currentCrawTargets.MoveNext(); if (!result) { e.Cancel = false; } else { CrawTarget = currentCrawTargets.Current; } } crawitems = CrawTarget.CrawItems; var title = $"手气不错,第{count}次结果"; datas = HtmlDoc.GetDataFromXPath(crawitems, IsMultiData, CrawTarget.RootXPath); propertyNames = new FreeDocument(datas.GetKeys().ToDictionary(d => d, d => (object)d)); datas.Insert(0, propertyNames); r = view.SetCurrentView(datas); window.Content = r; window.Title = title; break; case MessageBoxResult.Cancel: return; } }; window.ShowDialog(); }
private void AddNewItem(bool isAlert = true) { var path = SelectXPath; var rootPath = RootXPath; if (!string.IsNullOrEmpty(rootPath)) { //TODO: 当XPath路径错误时,需要捕获异常 HtmlNode root = null; try { root = HtmlDoc.DocumentNode.SelectSingleNodePlus(rootPath, RootFormat); } catch (Exception ex) { XLogSys.Print.Error($"{RootXPath} 不能被识别为正确的{RootFormat}表达式,请检查"); } if (!(root != null).SafeCheck($"使用当前父节点{RootFormat} {RootXPath},在文档中找不到任何父节点")) { return; } root = HtmlDoc.DocumentNode.SelectSingleNodePlus(rootPath, RootFormat)?.ParentNode; HtmlNode node = null; if ( !ControlExtended.SafeInvoke(() => HtmlDoc.DocumentNode.SelectSingleNodePlus(path, SearchFormat), ref node, LogType.Info, "检查子节点XPath正确性", true)) { return; } if (!(node != null).SafeCheck("使用当前子节点XPath,在文档中找不到任何子节点")) { return; } if (!node.IsAncestor(root) && isAlert) { if ( MessageBox.Show("当前XPath所在节点不是父节点的后代,请检查对应的XPath,是否依然要添加?", "提示信息", MessageBoxButton.YesNo) == MessageBoxResult.No) { return; } } string attr = ""; string attrValue = ""; XPathAnalyzer.GetAttribute(path, out attr, out attrValue); if (SearchFormat == SelectorFormat.XPath) { path = XPath.TakeOffPlus(node.XPath, root.XPath); if (attr != "") { path += "/@" + attr + "[1]"; } } } if (CrawlItems.FirstOrDefault(d => d.Name == SelectName) == null || MessageBox.Show("已经存在同名的属性,是否依然添加?", "提示信息", MessageBoxButton.OKCancel) == MessageBoxResult.OK) { var item = new CrawlItem { XPath = path, Name = SelectName, SampleData1 = SelectText }; item.Format = SearchFormat; CrawlItems.Add(item); SelectXPath = ""; SelectName = ""; XLogSys.Print.Info("成功添加属性"); } }