public void Save(NewsDataModel post) { string fileName = $"/{ Count.ToString("D8")}.json"; Console.WriteLine($"[{DateTime.Now.ToShortTimeString()}]save to: {DataDirectoryInfo.FullName}{fileName}"); using (var writer = new StreamWriter(DataDirectoryInfo.FullName + fileName, false, System.Text.Encoding.UTF8)) { string jsonString = JsonConvert.SerializeObject(post); writer.WriteLine(jsonString); writer.Close(); } Count++; }
// POST api/News public async Task <IHttpActionResult> Post(NewsDataModel value) { if (!ModelState.IsValid) { return(BadRequest("Model state is invalid!")); } try { db.NewsData.AddOrUpdate(i => new { i.Poster, i.Message, i.Provider }, value); await db.SaveChangesAsync(); return(Ok()); } catch (Exception ex) { return(BadRequest(ex.Message)); } }
public async Task FetchDataAsync(NowNewsConditions conditions, Saver saver) { var client = new HttpClient(); //maper config var mapperConfig = new MapperConfiguration(cfg => { cfg.CreateMap <NowNewsModel, NewsDataModel>(); }); var mapper = mapperConfig.CreateMapper(); var config = Configuration.Default; var context = BrowsingContext.New(config); var hrefs = new List <string>(); var startDate = DateTime.Now.Add(conditions.timeSpan); var endDate = DateTime.Now; var lastPostId = "0"; var keepGetApi = true; //第一頁 { var PageUrl = $"https://www.nownews.com/search?name={conditions.Keyword}"; var responseMessage = await client.GetAsync(PageUrl); var responseResult = await responseMessage.Content.ReadAsStringAsync(); var document = await context.OpenAsync(res => res.Content(responseResult)); hrefs.AddRange(document.QuerySelectorAll(".news-card__title-s a").Select(x => x.GetAttribute("href"))); //抓第一篇ID lastPostId = document.QuerySelector("#txtPageNo").GetAttribute("value"); } //之後抓最後一篇 //以API抓內容 do { var ApiUrl = $"https://www.nownews.com/nn-client/api/v1/client/search?name={conditions.Keyword}&id={lastPostId}"; var responseMessage = await client.GetAsync(ApiUrl); var responseResult = await responseMessage.Content.ReadAsStringAsync(); //var document = await context.OpenAsync(res => res.Content(responseResult)); var newsRespond = JsonConvert.DeserializeObject <NownewsSearchResult>(responseResult); hrefs.AddRange(newsRespond.data.Where(x => x.getDate > startDate && x.getDate <= DateTime.Now) .Select(x => Domain + x.postOnlyUrl)); lastPostId = newsRespond.data.OrderBy(x => x.sort).Last().id; //抓滿8篇再抓一次 keepGetApi = (newsRespond.data.Count() >= 8 && newsRespond.data.OrderBy(x => x.sort) .Last().getDate > startDate); keepGetApi = hrefs.Count > 10 ? false : keepGetApi; } while (keepGetApi); //逐頁內容 foreach (var href in hrefs) { var responseMessage = await client.GetAsync(href); var responseResult = await responseMessage.Content.ReadAsStringAsync(); var document = await context.OpenAsync(res => res.Content(responseResult)); var title = document.QuerySelector(".article-title").TextContent; var contentItems = document.QuerySelector("article"); foreach (var c in contentItems.ChildNodes) { continue;//contentItems.RemoveChild(c); } //var content = contentItems.TextContent; var newsContent = contentItems.ChildNodes.Where(x => x.NodeName == "#text").Select(x => x.TextContent); DateTime postDate; string pattern = @"[0-9]{4}-[0-9]{2}-[0-9]{2}"; DateTime.TryParse(Regex.Match(document.QuerySelector(".time").TextContent, pattern).Value, out postDate); var model = new NewsDataModel { Title = title, Content = string.Join("", newsContent).Replace("\t", "").Replace("\n", "").Replace(" ", ""), Date = postDate.ToString("yyyyMMdd"), Source = href }; // save result var result = mapper.Map <NewsDataModel>(model); saver.Save(result); } return; }
public async Task FetchDataAsync(TTVNewsConditions conditions, Saver saver) { //var result = new List<ChinatimesNewsModel>(); var client = new HttpClient(); var config = Configuration.Default; var context = BrowsingContext.New(config); //get html content var page = 1; var hrefs = new List <string>(); var paginationDataTotal = 0; do { var url = $"https://news.ttv.com.tw/search/{conditions.Keyword}/{page}"; var responseMessage = await client.GetAsync(url); var responseResult = await responseMessage.Content.ReadAsStringAsync(); var document = await context.OpenAsync(res => res.Content(responseResult)); var paginationHrefs = document.QuerySelectorAll("li .clearfix") .Where(x => DateTime.Parse(x.QuerySelector(".time").TextContent) > DateTime.Now.Add(conditions.timeSpan)) .Select(x => PlatfromUrl + x.GetAttribute("href")); paginationDataTotal = paginationHrefs.Count(); hrefs.AddRange(paginationHrefs); page++; } while (paginationDataTotal > 0); foreach (var href in hrefs) { try { var responseMessage = await client.GetAsync(href); var responseResult = await responseMessage.Content.ReadAsStringAsync(); var document = await context.OpenAsync(res => res.Content(responseResult)); var title = document.QuerySelector("h1").TextContent.Replace("\n", string.Empty).Replace(" ", string.Empty); var content = document.QuerySelector("#newscontent").TextContent .Replace("\n", string.Empty). Replace(" ", string.Empty); var postDate = DateTime.Parse(document.QuerySelector(".date.time").TextContent.Replace("\n", string.Empty)); var model = new NewsDataModel { Title = title, Content = content, Date = postDate.ToString("yyyyMMdd"), Source = href }; //save result saver.Save(model); } catch { } } return; }
public NewsDataManager() { _newsDataModel = new NewsDataModel(); }
public void setNewsDataModelJSON(string data) { _newsDataModel = JsonUtility.FromJson <NewsDataModel>(data); }
public List <NewsDataModel> GetNewsEve24() { List <NewsDataModel> result = new List <NewsDataModel>(); using (var hc = new HttpClient()) { HttpResponseMessage response = hc.GetAsync("http://evenews24.com/").Result; Stream stream = response.Content.ReadAsStreamAsync().Result; HtmlDocument _WebDocument = new HtmlDocument(); _WebDocument.Load(stream); HtmlNodeCollection _webnodes = _WebDocument.DocumentNode.SelectNodes("//div[contains(@class,'hentry clearfix')]"); foreach (HtmlNode _node in _webnodes) { NewsDataModel _newsItem = new NewsDataModel(); if (_node != null && _node.ChildNodes != null && _node.ChildNodes.Count >= 10) { if (_node.ChildNodes[1].Name == "h2") // Implies a proper news title { // Get title _newsItem.Title = _node.ChildNodes[1].InnerText; // Get Post Author and Date if (_node.ChildNodes[3].Name == "div") { if (_node.ChildNodes[3].ChildNodes != null && _node.ChildNodes[3].ChildNodes.Count >= 4) { if (_node.ChildNodes[3].ChildNodes[1].Name == "span") { _newsItem.PostAuthor = _node.ChildNodes[3].ChildNodes[1].InnerText; } if (_node.ChildNodes[3].ChildNodes[1].ChildNodes != null && _node.ChildNodes[3].ChildNodes[1].ChildNodes.Count > 0 && _node.ChildNodes[3].ChildNodes[1].ChildNodes[0].Name == "a" && _node.ChildNodes[3].ChildNodes[1].ChildNodes[0].Attributes != null && _node.ChildNodes[3].ChildNodes[1].ChildNodes[0].Attributes.Count >= 2 && _node.ChildNodes[3].ChildNodes[1].ChildNodes[0].Attributes[1].Name == "href") { _newsItem.PostAuthorUrl = _node.ChildNodes[3].ChildNodes[1].ChildNodes[0].Attributes[1].Value; } if (_node.ChildNodes[3].ChildNodes[3].Name == "span") { string postDateString = _node.ChildNodes[3].ChildNodes[3].InnerText; DateTime postDate = DateTime.MinValue; DateTime.TryParse(postDateString, out postDate); string dateFormat1 = "MMM d'st' yyyy h:mmtt"; DateTime.TryParseExact(postDateString, dateFormat1, CultureInfo.InvariantCulture, DateTimeStyles.None, out postDate); string dateFormat2 = "MMM d'rd' yyyy h:mmtt"; if (postDate == DateTime.MinValue) { DateTime.TryParseExact(postDateString, dateFormat2, CultureInfo.InvariantCulture, DateTimeStyles.None, out postDate); } string dateFormat3 = "MMM d'th' yyyy h:mmtt"; if (postDate == DateTime.MinValue) { DateTime.TryParseExact(postDateString, dateFormat3, CultureInfo.InvariantCulture, DateTimeStyles.None, out postDate); } string dateFormat4 = "MMM d'nd' yyyy h:mmtt"; if (postDate == DateTime.MinValue) { DateTime.TryParseExact(postDateString, dateFormat4, CultureInfo.InvariantCulture, DateTimeStyles.None, out postDate); } if (postDate != DateTime.MinValue) { _newsItem.PostDate = postDate; } } } } // Get image if (_node.ChildNodes[5].Name == "div") { if (_node.ChildNodes[5].ChildNodes != null && _node.ChildNodes[5].ChildNodes.Count >= 4) { if (_node.ChildNodes[5].ChildNodes[3].Name == "img" && _node.ChildNodes[5].ChildNodes[3].Attributes.Count > 0 && _node.ChildNodes[5].ChildNodes[3].Attributes[0].Name == "src") { _newsItem.ImgSrc = _node.ChildNodes[5].ChildNodes[3].Attributes[0].Value; } } } // Get summary if (_node.ChildNodes[7].Name == "div") { _newsItem.Summary = _node.ChildNodes[7].InnerText; } // Get Url if (_node.ChildNodes[9].Name == "div") { if (_node.ChildNodes[9].ChildNodes != null && _node.ChildNodes[9].ChildNodes.Count >= 2 && _node.ChildNodes[9].ChildNodes[1].Name == "a" && _node.ChildNodes[9].ChildNodes[1].Attributes != null && _node.ChildNodes[9].ChildNodes[1].Attributes.Count >= 1 && _node.ChildNodes[9].ChildNodes[1].Attributes[0].Name == "href") { _newsItem.PostUrl = _node.ChildNodes[9].ChildNodes[1].Attributes[0].Value; } } // Get Description // TODO: Fix this later /* * if (!String.IsNullOrWhiteSpace(_newsItem.PostUrl)) * { * using (var hc2 = new HttpClient()) * { * HttpResponseMessage response2 = hc.GetAsync(_newsItem.PostUrl).Result; * Stream stream2 = response2.Content.ReadAsStreamAsync().Result; * HtmlDocument _WebDocument2 = new HtmlDocument(); * _WebDocument2.Load(stream2); * * HtmlNodeCollection _webnodes2 = _WebDocument2.DocumentNode.SelectNodes("//div[contains(@class,'entry-content')]"); * * StringBuilder description = new StringBuilder(); * * foreach (HtmlNode _postNode in _webnodes2) * { * if (_postNode.Name == "div" && _postNode.ChildNodes != null && _postNode.ChildNodes.Count > 0) // Implies is part of the post * { * List<HtmlNode> postPNodes = _postNode.ChildNodes.Where(x => x.Name == "p").ToList(); * if (postPNodes.Count > 0) * { * foreach (HtmlNode _postPNodes in postPNodes) * { * description.Append(_postPNodes.InnerText + Environment.NewLine); * } * } * else * { * // They nest like this because they hate me * HtmlNode featuredImgNode = _postNode.ChildNodes.Where(x => x.Name == "div" && x.HasClass("featured-img")).FirstOrDefault(); * foreach (HtmlNode _postSubNode in featuredImgNode.ChildNodes) * { * if (_postSubNode.Name == "p") * { * description.Append(_postSubNode.InnerText + Environment.NewLine); * } * } * } * } * } * * _newsItem.Description = description.ToString(); * } * }*/ result.Add(_newsItem); } } } } return(result); }