Esempio n. 1
0
        public async Task <Models.Content.NewsContent> ContentInfo(string id)
        {
            var model = new Models.Content.NewsContent()
            {
                content       = string.Empty,
                contenttype   = 0,
                image         = string.Empty,
                lead          = string.Empty,
                rotitr        = string.Empty,
                rssid         = string.Empty,
                tags          = new List <string>(),
                titr          = string.Empty,
                basecontentid = string.Empty,
                userid        = string.Empty,
                _id           = string.Empty,
            };

            try
            {
                var _model = await NoSql.Instance.RunCommandAsync <BsonDocument>("{aggregate:'content',pipeline:[{$match:{_id:ObjectId('" + id + "')}},{$limit:1}]}");

                if (_model.GetValue("result").AsBsonArray.Any())
                {
                    model = MongoDB.Bson.Serialization.BsonSerializer.Deserialize <Models.Content.NewsContent>(_model.GetValue("result")[0].AsBsonDocument);
                }
            }
            catch (Exception ex)
            {
            }
            return(model);
        }
Esempio n. 2
0
        public async Task <bool> AddContent(Models.Content.NewsContent model)
        {
            try
            {
                var _id = ObjectId.GenerateNewId().ToString();
                if (model.tags == null)
                {
                    model.tags = new List <string>();
                }
                var res = await NoSql.Instance.RunCommandAsync <BsonDocument>("{update:'content',updates:[{q:{_id:ObjectId('" + _id + "')},u:{$set:{_id:ObjectId('" + _id + "'),userid:'" + model.userid + "',contenttype:" + model.contenttype + ",rssid:'" + model.rssid + "',basecontentid:'" + model.basecontentid + "',titr:'" + model.titr + "',rotitr:'" + model.rotitr + "',lead:'" + model.lead + "',content:'" + model.content + "',image:'" + model.image + "',tags:" + model.tags.toJSON() + ",createdateticks:" + DateTime.Now.Ticks + ",url:'" + model.url + "'}},upsert:true}]}");

                //var collection = NoSql.Instance.GetCollection<BsonDocument>("content");
                //await collection.InsertOneAsync(model.ToBsonDocument());
                return(true);
            }
            catch (Exception ex)
            {
                return(false);
            }
        }
Esempio n. 3
0
        public async Task <bool> Crawler(Models.BaseContent.BaseContent model)
        {
            try
            {
                string htmlContent = "";
                var    uri         = new Uri(model.url);
                using (var client = new HttpClient())
                {
                    client.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0");
                    client.DefaultRequestHeaders.Add("Host", uri.Authority);
                    using (var r = await client.GetAsync(uri))
                    {
                        htmlContent = await r.Content.ReadAsStringAsync();
                    }
                }
                var parser   = new HtmlParser();
                var document = parser.Parse(htmlContent);

                var rssModel = await ContentManagerRepository.RssInfo(model.rssid);

                var SiteModel = await ContentManagerRepository.SiteInfo(rssModel.siteid);

                //var template = Util.Common.fromJSON<Models.Crawl.CrawlTemplate>(System.IO.File.ReadAllText(HttpContext.Current.Server.MapPath("~/Models/simplecrawltemplate.json")));
                //var template = Util.Common.fromJSON<Models.Crawl.CrawlTemplate>(System.IO.File.ReadAllText(HostingEnvironment.MapPath("~/Models/simplecrawltemplate.json")));
                var template = SiteModel.template.FirstOrDefault();

                if (template != null)
                {
                    var content = new Models.Content.NewsContent();
                    content.rssid         = model.rssid;
                    content.userid        = model.userid;
                    content.contenttype   = (int)Util.Configuration.ContentType.News;
                    content.basecontentid = model._id;
                    content.url           = model.url;

                    foreach (var item in template.structure)
                    {
                        var element        = document.QuerySelector(item.query);
                        var elementcontent = "";
                        if (element != null && !string.IsNullOrWhiteSpace(item.query))
                        {
                            switch (item.type)
                            {
                            case "innerhtml":
                                elementcontent = Util.Common.CleanHtmlContent(element.InnerHtml);
                                break;

                            case "src":
                                elementcontent = new Uri(uri, element.GetAttribute(item.type)).ToString();
                                break;

                            default:
                                break;
                            }
                            try
                            {
                                content.GetType().GetProperty(item.field).SetValue(content, elementcontent, null);
                            }
                            catch
                            {
                            }
                        }
                    }

                    var result = await ContentRepository.AddContent(content);

                    if (result)
                    {
                        await baseContentRepository.ChangeIsCrawled(model);
                    }
                    return(result);
                }

                return(false);
            }
            catch (Exception e)
            {
                return(false);
            }
        }