public XPathDetectorModel(IEnumerable <HtmlResult> htmlResults, ScriptWorkMode workmode, Window theView, TextBox textbox) { HtmlDoc = new HtmlDocument(); var xpathHelper = new Dictionary <string, string> { { "all_image", "//img[@src]" }, { "all_item_with_id", @"//*[@id=""YOUR_ID""]" }, { "all_item_with_class", @"//*[@class=""YOUR_CLASS""]" } }; HtmlResults = htmlResults.ToList(); view = theView; htmlTextBox = textbox; XPath = new TextEditSelector(); XPath.SetSource(xpathHelper.Select(d => d.Value)); if (workmode == ScriptWorkMode.List) { ChildCount = 5; } else { ChildCount = 1; } CrawlItems = new ObservableCollection <CrawlItem>(); ChildItems = new ObservableCollection <CrawlItem>(); SelectedResult = HtmlResults.FirstOrDefault(); }
public MappingPair(List <string> source_keys, List <string> target_keys) { Source = new TextEditSelector(); Target = new TextEditSelector(); Source.SetSource(source_keys); Target.SetSource(target_keys); }
public ETLBase() { processManager = MainDescription.MainFrm.PluginDictionary["模块管理"] as IProcessManager; ETLSelector = new TextEditSelector { GetItems = this.GetAllETLNames() }; ETLRange = ""; Column = "column"; Enabled = true; }
public DbEX() { dataManager = MainDescription.MainFrm.PluginDictionary["DataManager"] as IDataManager; ConnectorSelector = new ExtendSelector <IDataBaseConnector>(); ConnectorSelector.GetItems = () => dataManager.CurrentConnectors.ToList(); TableNames = new TextEditSelector(); ConnectorSelector.SelectChanged += (s, e) => TableNames.SetSource(ConnectorSelector.SelectItem.RefreshTableNames().Select(d => d.Name)); TableNames.SelectChanged += (s, e) => { InformPropertyChanged("TableNames"); }; }
public SaveFileEX() { CrawlerSelector = new TextEditSelector { GetItems = () => { return (processManager.CurrentProcessCollections.Where(d => d is SmartCrawler) .Select(d => d.Name) .ToList()); } }; }
public SmartCrawler() { Http = new HttpItem(); CrawlItems = new ObservableCollection <CrawlItem>(); helper = new HttpHelper(); URL = ""; HtmlDoc = new HtmlDocument(); SelectText = ""; IsMultiData = ScriptWorkMode.List; IsAttribute = true; URL = "www.cnblogs.com"; ShareCookie = new TextEditSelector(); ShareCookie.GetItems = AppHelper.GetAllCrawlerNames(null); Commands2 = CommandBuilder.GetCommands( this, new[] { new Command(GlobalHelper.Get("key_302"), obj => AddNewItem(), obj => string.IsNullOrEmpty(SelectName) == false && string.IsNullOrEmpty(SelectXPath) == false, "add"), new Command(GlobalHelper.Get("search"), obj => GetXPathAsync(), obj => currentXPaths != null, "magnify"), new Command(GlobalHelper.Get("feellucky"), obj => FeelLucky(), obj => IsMultiData != ScriptWorkMode.NoTransform && isBusy == false, "smiley_happy" ), new Command(GlobalHelper.Get("key_624"), obj => { if (!(CrawlItems.Count > 0).SafeCheck(GlobalHelper.Get("key_625"))) { return; } if (IsMultiData == ScriptWorkMode.List && CrawlItems.Count < 2) { MessageBox.Show(GlobalHelper.Get("key_626"), GlobalHelper.Get("key_99")); return; } if (string.IsNullOrEmpty(this.URLHTML)) { this.VisitUrlAsync(); } var datas = HtmlDoc.DocumentNode.GetDataFromXPath(CrawlItems, IsMultiData, RootXPath, RootFormat).Take(20) .ToList(); var view = PluginProvider.GetObjectInstance <IDataViewer>(GlobalHelper.Get("key_230")); var r = view.SetCurrentView(datas); ControlExtended.DockableManager.AddDockAbleContent( FrmState.Custom, r, GlobalHelper.Get("key_627")); var rootPath = XPath.GetMaxCompareXPath(CrawlItems.Select(d => d.XPath)); if (datas.Count > 0 && MessageBox.Show(GlobalHelper.Get("is_save_to_tables"), GlobalHelper.Get("key_99"), MessageBoxButton.YesNo) == MessageBoxResult.Yes) { SysDataManager.AddDataCollection(datas, GlobalHelper.Get("key_624") + "_" + DateTime.Now.ToShortTimeString()); } if (datas.Count > 1 && string.IsNullOrEmpty(RootXPath) && rootPath.Length > 0 && IsMultiData == ScriptWorkMode.List && MessageBox.Show(string.Format(GlobalHelper.Get("key_628"), rootPath), GlobalHelper.Get("key_99"), MessageBoxButton.YesNo) == MessageBoxResult.Yes) { RootXPath = rootPath; RootFormat = SelectorFormat.XPath; HtmlDoc.CompileCrawItems(CrawlItems); OnPropertyChanged("RootXPath"); } }, icon: "page_search") }); }
public SmartCrawler() { Http = new HttpItem(); CrawlItems = new ObservableCollection <CrawlItem>(); helper = new HttpHelper(); URL = ""; HtmlDoc = new HtmlDocument(); SelectText = ""; IsMultiData = ScriptWorkMode.List; IsAttribute = true; URL = "www.cnblogs.com"; ShareCookie = new TextEditSelector(); ShareCookie.GetItems = AppHelper.GetAllCrawlerNames(null); Commands2 = CommandBuilder.GetCommands( this, new[] { new Command("添加", obj => AddNewItem(), obj => string.IsNullOrEmpty(SelectName) == false && string.IsNullOrEmpty(SelectXPath) == false, "add"), new Command("搜索", obj => GetXPathAsync(), obj => currentXPaths != null, "magnify"), new Command("手气不错", obj => FeelLucky(), obj => IsMultiData != ScriptWorkMode.NoTransform && isBusy == false, "smiley_happy" ), new Command("提取测试", obj => { if (!(CrawlItems.Count > 0).SafeCheck("属性数量不能为空")) { return; } if (IsMultiData == ScriptWorkMode.List && CrawlItems.Count < 2) { MessageBox.Show("列表模式下,属性数量不能少于2个", "提示信息"); return; } if (string.IsNullOrEmpty(this.URLHTML)) { this.VisitUrlAsync(); } var datas = HtmlDoc.DocumentNode.GetDataFromXPath(CrawlItems, IsMultiData, RootXPath, RootFormat).Take(20) .ToList(); var view = PluginProvider.GetObjectInstance <IDataViewer>("可编辑列表"); var r = view.SetCurrentView(datas); ControlExtended.DockableManager.AddDockAbleContent( FrmState.Custom, r, "提取数据测试结果(显示前20条)"); var rootPath = XPath.GetMaxCompareXPath(CrawlItems.Select(d => d.XPath)); if (datas.Count > 1 && string.IsNullOrEmpty(RootXPath) && rootPath.Length > 0 && IsMultiData == ScriptWorkMode.List && MessageBox.Show($"检测到列表的根节点为:{rootPath},是否设置根节点路径? 此操作有建议有经验用户使用,小白用户请点【否】", "提示信息", MessageBoxButton.YesNo) == MessageBoxResult.Yes) { RootXPath = rootPath; RootFormat = SelectorFormat.XPath; HtmlDoc.CompileCrawItems(CrawlItems); OnPropertyChanged("RootXPath"); } }, icon: "page_search") }); }
protected ToolBase() { ColumnSelector = new TextEditSelector(); ColumnSelector.SelectChanged += (s, e) => Column = ColumnSelector.SelectItem; }
public ResponseTF() { CrawlerSelector = new TextEditSelector(); CrawlerSelector.GetItems = this.GetAllCrawlerNames(); }
public ResponseTF() { CrawlerSelector = new TextEditSelector(); CrawlerSelector.GetItems = this.GetAllCrawlerNames(); IsMultiYield = false; }