public async Task <Models.Content.NewsContent> ContentInfo(string id) { var model = new Models.Content.NewsContent() { content = string.Empty, contenttype = 0, image = string.Empty, lead = string.Empty, rotitr = string.Empty, rssid = string.Empty, tags = new List <string>(), titr = string.Empty, basecontentid = string.Empty, userid = string.Empty, _id = string.Empty, }; try { var _model = await NoSql.Instance.RunCommandAsync <BsonDocument>("{aggregate:'content',pipeline:[{$match:{_id:ObjectId('" + id + "')}},{$limit:1}]}"); if (_model.GetValue("result").AsBsonArray.Any()) { model = MongoDB.Bson.Serialization.BsonSerializer.Deserialize <Models.Content.NewsContent>(_model.GetValue("result")[0].AsBsonDocument); } } catch (Exception ex) { } return(model); }
public async Task <bool> AddContent(Models.Content.NewsContent model) { try { var _id = ObjectId.GenerateNewId().ToString(); if (model.tags == null) { model.tags = new List <string>(); } var res = await NoSql.Instance.RunCommandAsync <BsonDocument>("{update:'content',updates:[{q:{_id:ObjectId('" + _id + "')},u:{$set:{_id:ObjectId('" + _id + "'),userid:'" + model.userid + "',contenttype:" + model.contenttype + ",rssid:'" + model.rssid + "',basecontentid:'" + model.basecontentid + "',titr:'" + model.titr + "',rotitr:'" + model.rotitr + "',lead:'" + model.lead + "',content:'" + model.content + "',image:'" + model.image + "',tags:" + model.tags.toJSON() + ",createdateticks:" + DateTime.Now.Ticks + ",url:'" + model.url + "'}},upsert:true}]}"); //var collection = NoSql.Instance.GetCollection<BsonDocument>("content"); //await collection.InsertOneAsync(model.ToBsonDocument()); return(true); } catch (Exception ex) { return(false); } }
public async Task <bool> Crawler(Models.BaseContent.BaseContent model) { try { string htmlContent = ""; var uri = new Uri(model.url); using (var client = new HttpClient()) { client.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"); client.DefaultRequestHeaders.Add("Host", uri.Authority); using (var r = await client.GetAsync(uri)) { htmlContent = await r.Content.ReadAsStringAsync(); } } var parser = new HtmlParser(); var document = parser.Parse(htmlContent); var rssModel = await ContentManagerRepository.RssInfo(model.rssid); var SiteModel = await ContentManagerRepository.SiteInfo(rssModel.siteid); //var template = Util.Common.fromJSON<Models.Crawl.CrawlTemplate>(System.IO.File.ReadAllText(HttpContext.Current.Server.MapPath("~/Models/simplecrawltemplate.json"))); //var template = Util.Common.fromJSON<Models.Crawl.CrawlTemplate>(System.IO.File.ReadAllText(HostingEnvironment.MapPath("~/Models/simplecrawltemplate.json"))); var template = SiteModel.template.FirstOrDefault(); if (template != null) { var content = new Models.Content.NewsContent(); content.rssid = model.rssid; content.userid = model.userid; content.contenttype = (int)Util.Configuration.ContentType.News; content.basecontentid = model._id; content.url = model.url; foreach (var item in template.structure) { var element = document.QuerySelector(item.query); var elementcontent = ""; if (element != null && !string.IsNullOrWhiteSpace(item.query)) { switch (item.type) { case "innerhtml": elementcontent = Util.Common.CleanHtmlContent(element.InnerHtml); break; case "src": elementcontent = new Uri(uri, element.GetAttribute(item.type)).ToString(); break; default: break; } try { content.GetType().GetProperty(item.field).SetValue(content, elementcontent, null); } catch { } } } var result = await ContentRepository.AddContent(content); if (result) { await baseContentRepository.ChangeIsCrawled(model); } return(result); } return(false); } catch (Exception e) { return(false); } }