public void Grab() { var data = GetData(_config.MainLink); var outputDir = Path.Combine(_mainConfig.OutputDir, _config.OutName + ".out"); if (!Directory.Exists(outputDir)) { Directory.CreateDirectory(outputDir); } var cq = CQ.Create(data); var newsLink = cq.Find(_config.NewsListSelector).Select(x => x.GetAttribute("href")).Where(x => IsNewLink(x, outputDir)); foreach (var link in newsLink) { var newsData = GetData(link); var newsCq = CQ.Create(newsData); var content = newsCq.Select(_config.NewsContentSelector); var normalizedSearchList = _config.SearchFor.Select(x => x.ToLowerInvariant()).ToArray(); var normalizedContent = string.Join("\r\n", content.Select(x => x.InnerText)).ToLowerInvariant(); var isMatched = normalizedSearchList.Any(x => normalizedContent.Contains(x)); if (isMatched) { var outData = new OutPutData { Content = content.Html(), Url = link, MainLink = _config.MainLink }; SaveDataToFile(outData, outputDir); } } }
private static void SaveDataToFile(OutPutData outData, string outputDir) { var fileName = Path.Combine(outputDir, outData.Url.GetHashString() + ".dat"); if (File.Exists(fileName)) { return; } File.WriteAllText(fileName, JsonConvert.SerializeObject(outData)); }