public async Task <ActionResult <CrawlItem> > PostCrawlItem([FromForm] CrawlItem crawlItem)
        {
            _context.CrawlItems.Add(crawlItem);
            await _context.SaveChangesAsync();

            return(CreatedAtAction("GetCrawlItem", new { id = crawlItem.Id }, crawlItem));
        }
Esempio n. 2
0
        private void AddNewItem(bool isAlert = true)
        {
            var path = SelectXPath;

            if (!string.IsNullOrEmpty(RootXPath))
            {
                var root = HtmlDoc.DocumentNode.SelectSingleNode(RootXPath).ParentNode;
                var node = HtmlDoc.DocumentNode.SelectSingleNode(path);
                if (!node.IsAncestor(root))
                {
                    if (isAlert)
                    {
                        MessageBox.Show("当前XPath所在节点不是父节点的后代,请检查对应的XPath");
                    }
                    return;
                }
                path = new XPath(node.XPath).TakeOff(root.XPath).ToString();
            }

            var item = new CrawlItem {
                XPath = path, Name = SelectName, SampleData1 = SelectText
            };

            if (CrawlItems.Any(d => d.Name == SelectName))
            {
                SelectName = "属性" + CrawlItems.Count;
                if (isAlert)
                {
                    MessageBox.Show($"已存在名称为{SelectName}的属性,不能重复添加");
                    return;
                }
            }
            CrawlItems.Add(item);
            SelectXPath = "";
        }
        public async Task <IActionResult> PutCrawlItem([FromForm] int id, [FromForm] CrawlItem crawlItem)
        {
            if (id != crawlItem.Id)
            {
                return(BadRequest());
            }

            _context.Entry(crawlItem).State = EntityState.Modified;

            try
            {
                await _context.SaveChangesAsync();
            }
            catch (DbUpdateConcurrencyException)
            {
                if (!CrawlItemExists(id))
                {
                    return(NotFound());
                }
                else
                {
                    throw;
                }
            }

            return(NoContent());
        }
Esempio n. 4
0
        public override void DictDeserialize(IDictionary <string, object> dicts, Scenario scenario = Scenario.Database)
        {
            base.DictDeserialize(dicts, scenario);
            URL                    = dicts.Set("URL", URL);
            RootXPath              = dicts.Set("RootXPath", RootXPath);
            Remark                 = dicts.Set("Remark", Remark);
            RootFormat             = dicts.Set("RootFormat", RootFormat);
            ShareCookie.SelectItem = dicts.Set("ShareCookie", ShareCookie.SelectItem);
            IsMultiData            = dicts.Set("IsMultiData", IsMultiData);
            IsSuperMode            = dicts.Set("IsSuperMode", IsSuperMode);
            if (dicts.ContainsKey("HttpSet"))
            {
                var doc2 = dicts["HttpSet"];
                var p    = doc2 as IDictionary <string, object>;
                Http.UnsafeDictDeserialize(p);
            }


            if (dicts.ContainsKey("Generator"))
            {
                var doc2 = dicts["Generator"];
                var p    = doc2 as IDictionary <string, object>;
            }
            var doc = dicts as FreeDocument;

            if (doc?.Children != null)
            {
                foreach (var child in doc.Children)
                {
                    var item = new CrawlItem();
                    item.DictDeserialize(child);
                    CrawlItems.Add(item);
                }
            }
        }
Esempio n. 5
0
        private void AddNewItem(bool isAlert = true)
        {
            var path = SelectXPath;

            if (!string.IsNullOrEmpty(RootXPath))
            {
                //TODO: 当XPath路径错误时,需要捕获异常
                HtmlNode root = null;
                try
                {
                    root = HtmlDoc.DocumentNode.SelectSingleNode(RootXPath);
                }
                catch (Exception ex)
                {
                    XLogSys.Print.Error($"{RootXPath}  不能被识别为正确的XPath表达式,请检查");
                }
                if (!(root != null).SafeCheck("使用当前父节点XPath,在文档中找不到任何父节点"))
                {
                    return;
                }
                root = HtmlDoc.DocumentNode.SelectSingleNode(RootXPath)?.ParentNode;

                HtmlNode node = null;
                if (
                    !ControlExtended.SafeInvoke(() => HtmlDoc.DocumentNode.SelectSingleNode(path), ref node,
                                                LogType.Info, "检查子节点XPath正确性", true))

                {
                    return;
                }
                if (!(node != null).SafeCheck("使用当前子节点XPath,在文档中找不到任何子节点"))
                {
                    return;
                }

                if (!node.IsAncestor(root) && isAlert)
                {
                    if (
                        MessageBox.Show("当前XPath所在节点不是父节点的后代,请检查对应的XPath,是否依然要添加?", "提示信息", MessageBoxButton.YesNo) ==
                        MessageBoxResult.No)
                    {
                        return;
                    }
                }
                path = XPath.TakeOff(node.XPath, root.XPath);
            }
            if (CrawlItems.FirstOrDefault(d => d.Name == SelectName) == null ||
                MessageBox.Show("已经存在同名的属性,是否依然添加?", "提示信息", MessageBoxButton.OKCancel) == MessageBoxResult.OK)
            {
                var item = new CrawlItem {
                    XPath = path, Name = SelectName, SampleData1 = SelectText
                };

                CrawlItems.Add(item);

                SelectXPath = "";
                SelectName  = "";
                XLogSys.Print.Info("成功添加属性");
            }
        }
Esempio n. 6
0
        public override void DictDeserialize(IDictionary <string, object> dicts, Scenario scenario = Scenario.Database)
        {
            base.DictDeserialize(dicts, scenario);
            URL         = dicts.Set("URL", URL);
            RootXPath   = dicts.Set("RootXPath", RootXPath);
            IsMultiData = dicts.Set("IsMultiData", IsMultiData);
            IsJson2xml  = dicts.Set("IsJson2xml", IsJson2xml);
            Crawler     = dicts.Set("Crawler", Crawler);
            if (dicts.ContainsKey("HttpSet"))
            {
                var doc2 = dicts["HttpSet"];
                var p    = doc2 as IDictionary <string, object>;
                Http.UnsafeDictDeserialize(p);
            }


            if (dicts.ContainsKey("Generator"))
            {
                var doc2 = dicts["Generator"];
                var p    = doc2 as IDictionary <string, object>;
            }
            var doc = dicts as FreeDocument;

            if (doc?.Children != null)
            {
                foreach (var child in doc.Children)
                {
                    var item = new CrawlItem();
                    item.DictDeserialize(child);
                    CrawlItems.Add(item);
                }
            }
        }
        public override List <CrawlResult> Process(List <CrawlResult> results)
        {
            // CrawlResult result = new CrawlResult();
            foreach (var result in results)
            {
                var document = PageCrawler.GetPage(result.Url).Result;
                // var elements = document.Result.QuerySelectorAll(this.Selector);
                foreach (var item in this.CrawlItems)
                {
                    var _newItem = new CrawlItem {
                        Name = item.Name, Selector = item.Selector, Attr = item.Attr
                    };
                    if (string.IsNullOrWhiteSpace(_newItem.Attr))
                    {
                        _newItem.Value = document.QuerySelector(_newItem.Selector)?.InnerHtml;
                    }
                    else
                    {
                        _newItem.Value = document.QuerySelector(_newItem.Selector)?.GetAttribute(_newItem.Attr);
                    }

                    result.CrawlItems.Add(_newItem);
                }
                // results.Add(result);
            }
            return(results);
        }
Esempio n. 8
0
        public List <CrawlItem> Execute()
        {
            if (caches.Count > 100)
            {
                caches.Clear();
            }

            Encoding     encoding = Encoding.GetEncoding("GBK");
            HtmlDocument document = HtmlAdapter.LoadDocument("http://xiaohua.zol.com.cn/new/1.html", encoding);
            var          nodes    = document.DocumentNode.SelectNodes(".//li[@class='article-summary']");

            if (nodes == null)
            {
                return(null);
            }

            List <CrawlItem> list = new List <CrawlItem>();

            foreach (var item in nodes)
            {
                var    aNode = item.SelectSingleNode("span[2]/a");
                string href  = aNode.GetAttributeValue("href", string.Empty);

                if (!string.IsNullOrWhiteSpace(href) && !caches.Contains(href))
                {
                    caches.Add(href);

                    HtmlDocument document1   = HtmlAdapter.LoadDocument("http://xiaohua.zol.com.cn" + href, encoding);
                    var          titleNode   = document1.DocumentNode.SelectSingleNode(".//h1[@class='article-title']");
                    var          contentNode = document1.DocumentNode.SelectSingleNode(".//div[@class='article-text']");
                    if (titleNode != null && contentNode != null)
                    {
                        var model = new CrawlItem
                        {
                            Title     = titleNode.InnerText,
                            Contents  = contentNode.InnerHtml,
                            CatalogId = 5,
                            AccountId = 1,
                        };
                        if (model.Title.Length < 8)
                        {
                            model.Tags = new List <string>()
                            {
                                model.Title
                            };
                        }
                        list.Add(model);
                    }
                }

                Thread.Sleep(5000); // 停留5秒
            }
            return(list);
        }
Esempio n. 9
0
        private void AddNewItem(bool isAlert = true)
        {
            var path = SelectXPath;

            if (!string.IsNullOrEmpty(RootXPath))
            {
                //TODO: 当XPath路径错误时,需要捕获异常
                var root = HtmlDoc.DocumentNode.SelectSingleNode(RootXPath);
                if (!(root != null).SafeCheck("使用当前父节点XPath,在文档中找不到任何父节点"))
                {
                    return;
                }
                root = HtmlDoc.DocumentNode.SelectSingleNode(RootXPath)?.ParentNode;
                var node = HtmlDoc.DocumentNode.SelectSingleNode(path);
                if (!(node != null).SafeCheck("使用当前子节点XPath,在文档中找不到任何子节点"))
                {
                    return;
                }
                if (!node.IsAncestor(root))
                {
                    if (isAlert)
                    {
                        if (
                            MessageBox.Show("当前XPath所在节点不是父节点的后代,请检查对应的XPath,是否依然要添加?", "提示信息", MessageBoxButton.YesNo) ==
                            MessageBoxResult.Yes)
                        {
                            path = new XPath(node.XPath).TakeOff(root.XPath).ToString();
                        }
                        else
                        {
                            return;
                        }
                    }
                }
            }

            var item = new CrawlItem {
                XPath = path, Name = SelectName, SampleData1 = SelectText
            };

            if (CrawlItems.Any(d => d.Name == SelectName))
            {
                SelectName = "属性" + CrawlItems.Count;
                if (isAlert)
                {
                    MessageBox.Show($"已存在名称为{SelectName}的属性,不能重复添加");
                    return;
                }
            }
            CrawlItems.Add(item);
            SelectXPath = "";
        }
Esempio n. 10
0
        private void AddNewItem(bool isAlert = true)
        {
            var item = new CrawlItem {
                XPath = SelectXPath, Name = SelectName, SampleData1 = SelectText
            };

            if (CrawlItems.Any(d => d.Name == SelectName))
            {
                SelectName = "属性" + CrawlItems.Count;
                if (isAlert)
                {
                    MessageBox.Show($"已存在名称为{SelectName}的属性,不能重复添加");
                    return;
                }
            }
            CrawlItems.Add(item);
            SelectXPath = "";
        }
Esempio n. 11
0
        private void AddNewItem(bool isAlert = true)
        {
            var path     = SelectXPath;
            var rootPath = RootXPath;

            if (!string.IsNullOrEmpty(rootPath))
            {
                //TODO: 当XPath路径错误时,需要捕获异常
                HtmlNode root = null;
                try
                {
                    root = HtmlDoc.DocumentNode.SelectSingleNodePlus(rootPath, RootFormat);
                }
                catch (Exception)
                {
                    XLogSys.Print.Error(string.Format(GlobalHelper.Get("key_662"), RootXPath, RootFormat));
                }
                if (!(root != null).SafeCheck(string.Format(GlobalHelper.Get("key_663"), RootFormat, RootXPath)))
                {
                    return;
                }
                root = HtmlDoc.DocumentNode.SelectSingleNodePlus(rootPath, RootFormat)?.ParentNode;

                HtmlNode node = null;
                if (
                    !ControlExtended.SafeInvoke(() => HtmlDoc.DocumentNode.SelectSingleNodePlus(path, SearchFormat),
                                                ref node,
                                                LogType.Info, GlobalHelper.Get("key_664"), true))

                {
                    return;
                }
                if (!(node != null).SafeCheck(GlobalHelper.Get("key_665")))
                {
                    return;
                }

                if (!node.IsAncestor(root) && isAlert)
                {
                    if (
                        MessageBox.Show(GlobalHelper.Get("key_666"), GlobalHelper.Get("key_99"), MessageBoxButton.YesNo) ==
                        MessageBoxResult.No)
                    {
                        return;
                    }
                }
                string attr      = "";
                string attrValue = "";
                XPathAnalyzer.GetAttribute(path, out attr, out attrValue);
                if (SearchFormat == SelectorFormat.XPath)
                {
                    path = XPath.TakeOffPlus(node.XPath, root.XPath);
                    if (attr != "")
                    {
                        path += "/@" + attr + "[1]";
                    }
                }
            }
            if (CrawlItems.FirstOrDefault(d => d.Name == SelectName) == null ||
                MessageBox.Show(GlobalHelper.Get("add_column_sure"), GlobalHelper.Get("key_99"), MessageBoxButton.OKCancel) == MessageBoxResult.OK)
            {
                var item = new CrawlItem {
                    XPath = path, Name = SelectName, SampleData1 = SelectText
                };
                item.Format = SearchFormat;
                CrawlItems.Add(item);

                SelectXPath = "";
                SelectName  = "";

                XLogSys.Print.Info(GlobalHelper.Get("key_668"));
            }
        }
Esempio n. 12
0
        private void AddNewItem(bool isAlert = true)
        {
            var path     = SelectXPath;
            var rootPath = RootXPath;

            if (!string.IsNullOrEmpty(rootPath))
            {
                //TODO: 当XPath路径错误时,需要捕获异常
                HtmlNode root = null;
                try
                {
                    root = HtmlDoc.DocumentNode.SelectSingleNodePlus(rootPath, RootFormat);
                }
                catch (Exception ex)
                {
                    XLogSys.Print.Error($"{RootXPath}  不能被识别为正确的{RootFormat}表达式,请检查");
                }
                if (!(root != null).SafeCheck($"使用当前父节点{RootFormat} {RootXPath},在文档中找不到任何父节点"))
                {
                    return;
                }
                root = HtmlDoc.DocumentNode.SelectSingleNodePlus(rootPath, RootFormat)?.ParentNode;

                HtmlNode node = null;
                if (
                    !ControlExtended.SafeInvoke(() => HtmlDoc.DocumentNode.SelectSingleNodePlus(path, SearchFormat),
                                                ref node,
                                                LogType.Info, "检查子节点XPath正确性", true))

                {
                    return;
                }
                if (!(node != null).SafeCheck("使用当前子节点XPath,在文档中找不到任何子节点"))
                {
                    return;
                }

                if (!node.IsAncestor(root) && isAlert)
                {
                    if (
                        MessageBox.Show("当前XPath所在节点不是父节点的后代,请检查对应的XPath,是否依然要添加?", "提示信息", MessageBoxButton.YesNo) ==
                        MessageBoxResult.No)
                    {
                        return;
                    }
                }
                string attr      = "";
                string attrValue = "";
                XPathAnalyzer.GetAttribute(path, out attr, out attrValue);
                if (SearchFormat == SelectorFormat.XPath)
                {
                    path = XPath.TakeOffPlus(node.XPath, root.XPath);
                    if (attr != "")
                    {
                        path += "/@" + attr + "[1]";
                    }
                }
            }
            if (CrawlItems.FirstOrDefault(d => d.Name == SelectName) == null ||
                MessageBox.Show("已经存在同名的属性,是否依然添加?", "提示信息", MessageBoxButton.OKCancel) == MessageBoxResult.OK)
            {
                var item = new CrawlItem {
                    XPath = path, Name = SelectName, SampleData1 = SelectText
                };
                item.Format = SearchFormat;
                CrawlItems.Add(item);

                SelectXPath = "";
                SelectName  = "";

                XLogSys.Print.Info("成功添加属性");
            }
        }