示例#1
0
        public void Create(string inputPath)
        {
            var sr           = new StreamReader(inputPath);
            var tagsFromFile = sr.ReadLine();

            var separators = new[] { ' ' };

            var tags = tagsFromFile?.Split(separators, StringSplitOptions.RemoveEmptyEntries);

            var data       = sr.ReadToEnd();
            var realTags   = tags?.Select(x => new Tag(x)).ToList();
            var newArticle = new GenericArticle <string>(realTags, data);

            sr.Dispose();
            Created?.Invoke(this, newArticle);
        }
示例#2
0
文件: Crawler.cs 项目: workcard/CafeT
        private List <GenericArticle> GetArticles(string url, string css)
        {
            List <GenericArticle> _articles = new List <GenericArticle>();
            GenericArticle        _article  = new GenericArticle();
            var _url = db.Urls.Where(c => c.UrlLink == url).FirstOrDefault();

            string       _headerUrl = string.Empty;
            HtmlDocument _doc       = new HtmlDocument();

            if (url.IsUrl())
            {
                _doc.LoadHtml(url.GetHtml().Result);
            }

            var _nodes = _doc.GetNodesByClass(css);

            if (_nodes != null && _nodes.Count() > 0)
            {
                foreach (var item in _nodes)
                {
                    GenericUrl _genericUrl = new GenericUrl();
                    _genericUrl = _url.ToGeneric();
                    try
                    {
                        var _object = item.HtmlNodeToGArticle(_genericUrl, _headerUrl);
                        if (_object != null)
                        {
                            _articles.Add(_object);
                        }
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex.Message);
                    }
                }
            }
            return(_articles.ToList());
        }