private void Button_Click(object sender, RoutedEventArgs e) { var html1 = NetCommon.DownloadString(URL1Text.Text); var html2 = NetCommon.DownloadString(URL2Text.Text); var tree1 = new HtmlTree(html1); var tree2 = new HtmlTree(html2); tree1.BuildTree(); tree2.BuildTree(); var diff = tree1.Diff(tree2); if (diff.Item1) { MessageBox.Show("The structure of the downloaded data is exactly the same and no differences can be found.", "Diff", MessageBoxButton.OK, MessageBoxImage.Information); return; } var rr = new List <CustomCrawlerDiffDataGridItemViewModel>(); var index = 0; foreach (var node in diff.Item2) { string info; if (node.Item1.Name != node.Item2.Name) { info = $"Tag-diff: {node.Item1.Name} <=> {node.Item2.Name}"; } else if (node.Item1.ChildNodes.Count != node.Item2.ChildNodes.Count) { info = $"Childcount-diff: {node.Item1.XPath}"; } else if (node.Item1.Name == "#text") { info = $"Text-diff: {node.Item1.InnerText.Trim()} <=> {node.Item2.InnerText.Trim()}"; } else if (!HtmlTree.IsEqual(node.Item1.Attributes, node.Item2.Attributes)) { info = $"Attributes-diff: {node.Item1.XPath}"; } else { info = ""; } rr.Add(new CustomCrawlerDiffDataGridItemViewModel { Index = (++index).ToString(), Info = info, Location = node.Item1, }); } marking(tree1); browser.LoadHtml(tree1[0][0].OuterHtml, URL1Text.Text); Thread.Sleep(500); DiffList.DataContext = new CustomCrawlerDiffDataGridViewModel(rr); }
private void marking(HtmlTree tree) { for (int i = 0; i <= tree.Height; i++) { for (int j = 0; j < tree[i].Count; j++) { if (tree[i][j].Name != "#comment" && tree[i][j].Name != "#text") { tree[i][j].SetAttributeValue("ccw_tag", $"ccw_{i}_{j}"); tree[i][j].SetAttributeValue("onmouseenter", $"ccw.hoverelem('ccw_{i}_{j}')"); tree[i][j].SetAttributeValue("onmouseleave", $"ccw.hoverelem('ccw_{i}_{j}')"); } } } }
private void URLButton_Click(object sender, RoutedEventArgs e) { if (original_url == URLText.Text) { HTMLList.DataContext = new CustomCrawlerDataGridViewModel(GetLoadResults()); return; } try { original_url = URLText.Text; try { root_url = string.Join("/", URLText.Text.Split(new char[] { '/' }, 4), 0, 3); } catch { } if (driverCheck.IsChecked == false) { string html; if (!File.Exists(URLText.Text)) { var client = NetCommon.GetDefaultClient(); if (EucKR.IsChecked == true) { client.Encoding = Encoding.GetEncoding(51949); } html = client.DownloadString(URLText.Text); } else { html = File.ReadAllText(URLText.Text); } tree = new HtmlTree(html); tree.BuildTree(); HTMLList.DataContext = new CustomCrawlerDataGridViewModel(GetLoadResults()); } else { var driver = new SeleniumWrapper(); driver.Navigate(URLText.Text); tree = new HtmlTree(driver.GetHtml()); tree.BuildTree(); driver.Close(); HTMLList.DataContext = new CustomCrawlerDataGridViewModel(GetLoadResults()); } } catch (Exception ex) { MessageBox.Show(ex.Message, Title, MessageBoxButton.OK, MessageBoxImage.Error); } }
public CustomCrawlerCluster(string url, HtmlTree tree) { InitializeComponent(); browser = new ChromiumWebBrowser(string.Empty); browserContainer.Content = browser; browser.IsBrowserInitializedChanged += Browser_IsBrowserInitializedChanged; CefSharpSettings.LegacyJavascriptBindingEnabled = true; browser.JavascriptObjectRepository.Register("ccw", cbccw = new CallbackCCW(this), isAsync: true); this.url = url; this.tree = tree; ResultList.DataContext = new CustomCrawlerClusterDataGridViewModel(); ResultList.Sorting += new DataGridSortingEventHandler(new DataGridSorter <CustomCrawlerClusterDataGridItemViewModel>(ResultList).SortHandler); CaptureList.DataContext = new CustomCrawlerClusterCaptureDataGridViewModel(); CaptureList.Sorting += new DataGridSortingEventHandler(new DataGridSorter <CustomCrawlerClusterCaptureDataGridItemViewModel>(CaptureList).SortHandler); PatternList.DataContext = new CustomCrawlerClusterPatternDataGridViewModel(); PatternList.Sorting += new DataGridSortingEventHandler(new DataGridSorter <CustomCrawlerClusterPatternDataGridItemViewModel>(PatternList).SortHandler); for (int i = 0; i <= tree.Height; i++) { for (int j = 0; j < tree[i].Count; j++) { if (tree[i][j].Name != "#comment" && tree[i][j].Name != "#text") { tree[i][j].SetAttributeValue("ccw_tag", $"ccw_{i}_{j}"); if (tree[i][j].Attributes.Contains("onmouseenter")) { tree[i][j].SetAttributeValue("origin_onmouseenter", tree[i][j].GetAttributeValue("onmouseenter", "")); } if (tree[i][j].Attributes.Contains("onmouseleave")) { tree[i][j].SetAttributeValue("origin_onmouseleave", tree[i][j].GetAttributeValue("onmouseleave", "")); } tree[i][j].SetAttributeValue("onmouseenter", $"ccw.hoverelem('ccw_{i}_{j}')"); tree[i][j].SetAttributeValue("onmouseleave", $"ccw.hoverelem('ccw_{i}_{j}')"); } } } KeyDown += CustomCrawlerCluster_KeyDown; Loaded += CustomCrawlerCluster_Loaded; }