예제 #1
0
        public void Save(NewsDataModel post)
        {
            string fileName = $"/{ Count.ToString("D8")}.json";

            Console.WriteLine($"[{DateTime.Now.ToShortTimeString()}]save to: {DataDirectoryInfo.FullName}{fileName}");

            using (var writer = new StreamWriter(DataDirectoryInfo.FullName + fileName, false, System.Text.Encoding.UTF8))
            {
                string jsonString = JsonConvert.SerializeObject(post);
                writer.WriteLine(jsonString);
                writer.Close();
            }
            Count++;
        }
예제 #2
0
        // POST api/News
        public async Task <IHttpActionResult> Post(NewsDataModel value)
        {
            if (!ModelState.IsValid)
            {
                return(BadRequest("Model state is invalid!"));
            }

            try
            {
                db.NewsData.AddOrUpdate(i => new { i.Poster, i.Message, i.Provider }, value);
                await db.SaveChangesAsync();

                return(Ok());
            }
            catch (Exception ex)
            {
                return(BadRequest(ex.Message));
            }
        }
예제 #3
0
        public async Task FetchDataAsync(NowNewsConditions conditions, Saver saver)
        {
            var client = new HttpClient();

            //maper config
            var mapperConfig = new MapperConfiguration(cfg =>
            {
                cfg.CreateMap <NowNewsModel, NewsDataModel>();
            });
            var mapper = mapperConfig.CreateMapper();

            var config  = Configuration.Default;
            var context = BrowsingContext.New(config);
            var hrefs   = new List <string>();

            var startDate = DateTime.Now.Add(conditions.timeSpan);
            var endDate   = DateTime.Now;

            var lastPostId = "0";
            var keepGetApi = true;

            //第一頁
            {
                var PageUrl         = $"https://www.nownews.com/search?name={conditions.Keyword}";
                var responseMessage = await client.GetAsync(PageUrl);

                var responseResult = await responseMessage.Content.ReadAsStringAsync();

                var document = await context.OpenAsync(res => res.Content(responseResult));

                hrefs.AddRange(document.QuerySelectorAll(".news-card__title-s a").Select(x => x.GetAttribute("href")));
                //抓第一篇ID
                lastPostId = document.QuerySelector("#txtPageNo").GetAttribute("value");
            }


            //之後抓最後一篇

            //以API抓內容
            do
            {
                var ApiUrl          = $"https://www.nownews.com/nn-client/api/v1/client/search?name={conditions.Keyword}&id={lastPostId}";
                var responseMessage = await client.GetAsync(ApiUrl);

                var responseResult = await responseMessage.Content.ReadAsStringAsync();

                //var document = await context.OpenAsync(res => res.Content(responseResult));
                var newsRespond = JsonConvert.DeserializeObject <NownewsSearchResult>(responseResult);

                hrefs.AddRange(newsRespond.data.Where(x => x.getDate > startDate && x.getDate <= DateTime.Now)
                               .Select(x => Domain + x.postOnlyUrl));

                lastPostId = newsRespond.data.OrderBy(x => x.sort).Last().id;
                //抓滿8篇再抓一次
                keepGetApi = (newsRespond.data.Count() >= 8 &&
                              newsRespond.data.OrderBy(x => x.sort)
                              .Last().getDate > startDate);

                keepGetApi = hrefs.Count > 10 ? false : keepGetApi;
            } while (keepGetApi);


            //逐頁內容
            foreach (var href in hrefs)
            {
                var responseMessage = await client.GetAsync(href);

                var responseResult = await responseMessage.Content.ReadAsStringAsync();

                var document = await context.OpenAsync(res => res.Content(responseResult));

                var title        = document.QuerySelector(".article-title").TextContent;
                var contentItems = document.QuerySelector("article");
                foreach (var c in contentItems.ChildNodes)
                {
                    continue;//contentItems.RemoveChild(c);
                }
                //var content = contentItems.TextContent;
                var      newsContent = contentItems.ChildNodes.Where(x => x.NodeName == "#text").Select(x => x.TextContent);
                DateTime postDate;
                string   pattern = @"[0-9]{4}-[0-9]{2}-[0-9]{2}";
                DateTime.TryParse(Regex.Match(document.QuerySelector(".time").TextContent, pattern).Value, out postDate);

                var model = new NewsDataModel
                {
                    Title   = title,
                    Content = string.Join("", newsContent).Replace("\t", "").Replace("\n", "").Replace(" ", ""),
                    Date    = postDate.ToString("yyyyMMdd"),
                    Source  = href
                };

                // save result
                var result = mapper.Map <NewsDataModel>(model);
                saver.Save(result);
            }

            return;
        }
        public async Task FetchDataAsync(TTVNewsConditions conditions, Saver saver)
        {
            //var result = new List<ChinatimesNewsModel>();

            var client = new HttpClient();

            var config  = Configuration.Default;
            var context = BrowsingContext.New(config);

            //get html content
            var page  = 1;
            var hrefs = new List <string>();
            var paginationDataTotal = 0;

            do
            {
                var url             = $"https://news.ttv.com.tw/search/{conditions.Keyword}/{page}";
                var responseMessage = await client.GetAsync(url);

                var responseResult = await responseMessage.Content.ReadAsStringAsync();

                var document = await context.OpenAsync(res => res.Content(responseResult));

                var paginationHrefs = document.QuerySelectorAll("li .clearfix")
                                      .Where(x => DateTime.Parse(x.QuerySelector(".time").TextContent) > DateTime.Now.Add(conditions.timeSpan))
                                      .Select(x => PlatfromUrl + x.GetAttribute("href"));
                paginationDataTotal = paginationHrefs.Count();

                hrefs.AddRange(paginationHrefs);

                page++;
            } while (paginationDataTotal > 0);

            foreach (var href in hrefs)
            {
                try
                {
                    var responseMessage = await client.GetAsync(href);

                    var responseResult = await responseMessage.Content.ReadAsStringAsync();

                    var document = await context.OpenAsync(res => res.Content(responseResult));

                    var title   = document.QuerySelector("h1").TextContent.Replace("\n", string.Empty).Replace(" ", string.Empty);
                    var content = document.QuerySelector("#newscontent").TextContent
                                  .Replace("\n", string.Empty).
                                  Replace(" ", string.Empty);
                    var postDate = DateTime.Parse(document.QuerySelector(".date.time").TextContent.Replace("\n", string.Empty));

                    var model = new NewsDataModel
                    {
                        Title   = title,
                        Content = content,
                        Date    = postDate.ToString("yyyyMMdd"),
                        Source  = href
                    };

                    //save result
                    saver.Save(model);
                }
                catch
                {
                }
            }
            return;
        }
예제 #5
0
 public NewsDataManager()
 {
     _newsDataModel = new NewsDataModel();
 }
예제 #6
0
 public void setNewsDataModelJSON(string data)
 {
     _newsDataModel = JsonUtility.FromJson <NewsDataModel>(data);
 }
예제 #7
0
        public List <NewsDataModel> GetNewsEve24()
        {
            List <NewsDataModel> result = new List <NewsDataModel>();

            using (var hc = new HttpClient())
            {
                HttpResponseMessage response = hc.GetAsync("http://evenews24.com/").Result;
                Stream       stream          = response.Content.ReadAsStreamAsync().Result;
                HtmlDocument _WebDocument    = new HtmlDocument();
                _WebDocument.Load(stream);

                HtmlNodeCollection _webnodes = _WebDocument.DocumentNode.SelectNodes("//div[contains(@class,'hentry clearfix')]");

                foreach (HtmlNode _node in _webnodes)
                {
                    NewsDataModel _newsItem = new NewsDataModel();
                    if (_node != null && _node.ChildNodes != null && _node.ChildNodes.Count >= 10)
                    {
                        if (_node.ChildNodes[1].Name == "h2") // Implies a proper news title
                        {
                            // Get title
                            _newsItem.Title = _node.ChildNodes[1].InnerText;

                            // Get Post Author and Date
                            if (_node.ChildNodes[3].Name == "div")
                            {
                                if (_node.ChildNodes[3].ChildNodes != null && _node.ChildNodes[3].ChildNodes.Count >= 4)
                                {
                                    if (_node.ChildNodes[3].ChildNodes[1].Name == "span")
                                    {
                                        _newsItem.PostAuthor = _node.ChildNodes[3].ChildNodes[1].InnerText;
                                    }
                                    if (_node.ChildNodes[3].ChildNodes[1].ChildNodes != null && _node.ChildNodes[3].ChildNodes[1].ChildNodes.Count > 0 && _node.ChildNodes[3].ChildNodes[1].ChildNodes[0].Name == "a" && _node.ChildNodes[3].ChildNodes[1].ChildNodes[0].Attributes != null && _node.ChildNodes[3].ChildNodes[1].ChildNodes[0].Attributes.Count >= 2 && _node.ChildNodes[3].ChildNodes[1].ChildNodes[0].Attributes[1].Name == "href")
                                    {
                                        _newsItem.PostAuthorUrl = _node.ChildNodes[3].ChildNodes[1].ChildNodes[0].Attributes[1].Value;
                                    }
                                    if (_node.ChildNodes[3].ChildNodes[3].Name == "span")
                                    {
                                        string   postDateString = _node.ChildNodes[3].ChildNodes[3].InnerText;
                                        DateTime postDate       = DateTime.MinValue;
                                        DateTime.TryParse(postDateString, out postDate);

                                        string dateFormat1 = "MMM d'st' yyyy h:mmtt";
                                        DateTime.TryParseExact(postDateString, dateFormat1, CultureInfo.InvariantCulture, DateTimeStyles.None, out postDate);
                                        string dateFormat2 = "MMM d'rd' yyyy h:mmtt";
                                        if (postDate == DateTime.MinValue)
                                        {
                                            DateTime.TryParseExact(postDateString, dateFormat2, CultureInfo.InvariantCulture, DateTimeStyles.None, out postDate);
                                        }
                                        string dateFormat3 = "MMM d'th' yyyy h:mmtt";
                                        if (postDate == DateTime.MinValue)
                                        {
                                            DateTime.TryParseExact(postDateString, dateFormat3, CultureInfo.InvariantCulture, DateTimeStyles.None, out postDate);
                                        }
                                        string dateFormat4 = "MMM d'nd' yyyy h:mmtt";
                                        if (postDate == DateTime.MinValue)
                                        {
                                            DateTime.TryParseExact(postDateString, dateFormat4, CultureInfo.InvariantCulture, DateTimeStyles.None, out postDate);
                                        }

                                        if (postDate != DateTime.MinValue)
                                        {
                                            _newsItem.PostDate = postDate;
                                        }
                                    }
                                }
                            }

                            // Get image
                            if (_node.ChildNodes[5].Name == "div")
                            {
                                if (_node.ChildNodes[5].ChildNodes != null && _node.ChildNodes[5].ChildNodes.Count >= 4)
                                {
                                    if (_node.ChildNodes[5].ChildNodes[3].Name == "img" && _node.ChildNodes[5].ChildNodes[3].Attributes.Count > 0 && _node.ChildNodes[5].ChildNodes[3].Attributes[0].Name == "src")
                                    {
                                        _newsItem.ImgSrc = _node.ChildNodes[5].ChildNodes[3].Attributes[0].Value;
                                    }
                                }
                            }

                            // Get summary
                            if (_node.ChildNodes[7].Name == "div")
                            {
                                _newsItem.Summary = _node.ChildNodes[7].InnerText;
                            }

                            // Get Url
                            if (_node.ChildNodes[9].Name == "div")
                            {
                                if (_node.ChildNodes[9].ChildNodes != null && _node.ChildNodes[9].ChildNodes.Count >= 2 && _node.ChildNodes[9].ChildNodes[1].Name == "a" && _node.ChildNodes[9].ChildNodes[1].Attributes != null && _node.ChildNodes[9].ChildNodes[1].Attributes.Count >= 1 && _node.ChildNodes[9].ChildNodes[1].Attributes[0].Name == "href")
                                {
                                    _newsItem.PostUrl = _node.ChildNodes[9].ChildNodes[1].Attributes[0].Value;
                                }
                            }

                            // Get Description
                            // TODO: Fix this later

                            /*
                             * if (!String.IsNullOrWhiteSpace(_newsItem.PostUrl))
                             * {
                             *  using (var hc2 = new HttpClient())
                             *  {
                             *      HttpResponseMessage response2 = hc.GetAsync(_newsItem.PostUrl).Result;
                             *      Stream stream2 = response2.Content.ReadAsStreamAsync().Result;
                             *      HtmlDocument _WebDocument2 = new HtmlDocument();
                             *      _WebDocument2.Load(stream2);
                             *
                             *      HtmlNodeCollection _webnodes2 = _WebDocument2.DocumentNode.SelectNodes("//div[contains(@class,'entry-content')]");
                             *
                             *      StringBuilder description = new StringBuilder();
                             *
                             *      foreach (HtmlNode _postNode in _webnodes2)
                             *      {
                             *          if (_postNode.Name == "div" && _postNode.ChildNodes != null && _postNode.ChildNodes.Count > 0) // Implies is part of the post
                             *          {
                             *              List<HtmlNode> postPNodes = _postNode.ChildNodes.Where(x => x.Name == "p").ToList();
                             *              if (postPNodes.Count > 0)
                             *              {
                             *                  foreach (HtmlNode _postPNodes in postPNodes)
                             *                  {
                             *                      description.Append(_postPNodes.InnerText + Environment.NewLine);
                             *                  }
                             *              }
                             *              else
                             *              {
                             *                  // They nest like this because they hate me
                             *                  HtmlNode featuredImgNode = _postNode.ChildNodes.Where(x => x.Name == "div" && x.HasClass("featured-img")).FirstOrDefault();
                             *                  foreach (HtmlNode _postSubNode in featuredImgNode.ChildNodes)
                             *                  {
                             *                      if (_postSubNode.Name == "p")
                             *                      {
                             *                          description.Append(_postSubNode.InnerText + Environment.NewLine);
                             *                      }
                             *                  }
                             *              }
                             *          }
                             *      }
                             *
                             *      _newsItem.Description = description.ToString();
                             *  }
                             * }*/

                            result.Add(_newsItem);
                        }
                    }
                }
            }
            return(result);
        }