Exemplo n.º 1
0
        public void Grab()
        {
            var data      = GetData(_config.MainLink);
            var outputDir = Path.Combine(_mainConfig.OutputDir, _config.OutName + ".out");

            if (!Directory.Exists(outputDir))
            {
                Directory.CreateDirectory(outputDir);
            }

            var cq       = CQ.Create(data);
            var newsLink = cq.Find(_config.NewsListSelector).Select(x => x.GetAttribute("href")).Where(x => IsNewLink(x, outputDir));

            foreach (var link in newsLink)
            {
                var newsData             = GetData(link);
                var newsCq               = CQ.Create(newsData);
                var content              = newsCq.Select(_config.NewsContentSelector);
                var normalizedSearchList = _config.SearchFor.Select(x => x.ToLowerInvariant()).ToArray();
                var normalizedContent    = string.Join("\r\n", content.Select(x => x.InnerText)).ToLowerInvariant();
                var isMatched            = normalizedSearchList.Any(x => normalizedContent.Contains(x));
                if (isMatched)
                {
                    var outData = new OutPutData
                    {
                        Content  = content.Html(),
                        Url      = link,
                        MainLink = _config.MainLink
                    };
                    SaveDataToFile(outData, outputDir);
                }
            }
        }
Exemplo n.º 2
0
        private static void SaveDataToFile(OutPutData outData, string outputDir)
        {
            var fileName = Path.Combine(outputDir, outData.Url.GetHashString() + ".dat");

            if (File.Exists(fileName))
            {
                return;
            }
            File.WriteAllText(fileName, JsonConvert.SerializeObject(outData));
        }