示例#1
0
        private async void button3_Click(object sender, EventArgs e)
        {
            //var strData =  File.ReadAllText(@"D:\wamp\tmp\3.txt");
            //var data = Newtonsoft.Json.JsonConvert.DeserializeObject<List<Word>>(strData);


            //var wordsWithOccurences = await MongoDBHandler.GetWordCollection();
            var wordsWithOccurences = _intermediateWordContainer;


            var articles = await MySQLHandler.GetAllArticlesAsync();

            //articles.Reverse();
            //articles = articles.Take(110).ToList();
            // take top 1000 words

            var wordsToProcess = wordsWithOccurences.Take(1000).ToList();

            // get occurence of each word by individual article

            var articleWordOccurenceList = new List <ArticleWordOccurence>();

            int counter = 0;

            await MongoDBHandler.ClearArticleWordOccurences();

            var thread = new Thread(async() =>
            {
                foreach (var article in articles)
                {
                    foreach (var word in wordsToProcess)
                    {
                        var count = GeneralHandlers.CountStringOccurrences(article.Text, word.WordName);
                        articleWordOccurenceList.Add(new ArticleWordOccurence
                        {
                            ArticleId = article.Id,
                            Word      = word.WordName,
                            Occurence = count
                        });
                    }


                    if (counter > 1000)
                    {
                        counter = 0;

                        await MongoDBHandler.InsertArticleWordOccurences(articleWordOccurenceList);
                        articleWordOccurenceList.Clear();
                    }

                    counter++;
                }

                await MongoDBHandler.InsertArticleWordOccurences(articleWordOccurenceList);
            });

            thread.Start();
        }
        private async void button1_Click(object sender, EventArgs e)
        {
            // get articles from MySQL database filled with metadata

            var sqlArticles = await MySQLHandler.GetAllArticlesWithMetadata();

            //var data = Newtonsoft.Json.JsonConvert.SerializeObject(sqlArticles);
            //File.WriteAllText(@"D:\wamp\tmp\sqlData.json", data);

            //var sqlArticles = Newtonsoft.Json.JsonConvert.DeserializeObject<List<SqlArticleItem>>(File.ReadAllText(@"D:\wamp\tmp\sqlData.json"));

            // map SQL article data structure to full featured data set

            var articles = new List <Article>();

            foreach (var sqlArticle in sqlArticles)
            {
                var article = new Article
                {
                    Id    = sqlArticle.Id,
                    Title = sqlArticle.Title,
                    Text  = sqlArticle.Text,
                    Views = sqlArticle.Views,
                    MainArticleCategory = sqlArticle.MainArticleCategory,
                    ArticleType         = sqlArticle.NewsType,
                    Tags = string.IsNullOrEmpty(sqlArticle.Tags)
                        ? new List <Tag>()
                        : sqlArticle.Tags.Split(',').Select(b => new Tag {
                        Id = int.Parse(b)
                    }).ToList(),

                    VideoFocusedArticle = !string.IsNullOrEmpty(sqlArticle.VideoId),
                    ActivePeriod        = StaticGlobals.PointOfMeasure - sqlArticle.InsertTime
                };

                if (GeneralHandlers.IsValidJson(sqlArticle.AssocNewsMetadata))
                {
                    var p_author = JObject.Parse(sqlArticle.AssocNewsMetadata)["p_author"];
                    article.Author = p_author == null ? string.Empty : p_author.ToString();
                }

                articles.Add(article);
            }


            // Retrieve FB Graph article data to fill Facebook related fields for articles

            var fbGraphArticleData = await MongoDBHandler.GetFbGraphArticles();


            // process FB Graph article data to only contain articles we are interested in (remove other post types)



            var fbGraphArticleDataFiltered = fbGraphArticleData.Where(a => !string.IsNullOrEmpty(a.data_link) &&
                                                                      a.data_link.IndexOf("crystalsport.ge/news/",
                                                                                          StringComparison.InvariantCulture) >= 0)
                                             .ToList();



            fbGraphArticleDataFiltered.ForEach(a => a.data_link = a.data_link.Split('/').Last());
            fbGraphArticleDataFiltered.ForEach(a => a.data_link = a.data_link.Split('?').First());

            fbGraphArticleDataFiltered =
                fbGraphArticleDataFiltered.Where(a => Regex.IsMatch(a.data_link, @"^\d+$")).ToList();

            foreach (var article in articles)
            {
                // 1. Get the amount of images in the article

                article.ImageCount = GeneralHandlers.CountStringOccurrences(article.Text, "<img ");

                // 2. Get the amount of videos in the article

                article.VideoCount = GeneralHandlers.CountStringOccurrences(article.Text, "<video ");

                // 3. Get the amount of iframes in the article

                article.IframeCount = GeneralHandlers.CountStringOccurrences(article.Text, "<iframe ");

                // 4. Check if there's a promotional banner within the article's text

                article.BannerInsideArticle = article.Text.Contains("[ArticleBannerPlaceholder]");

                // 5. Get corresponding article object in Graph API Data and use it to fill Facebook related fields

                article.PublishedOnSocialMedia =
                    fbGraphArticleDataFiltered.Any(a => a.data_link.Equals(article.Id.ToString()));

                article.BoostedOnSocialMedia = fbGraphArticleDataFiltered.Any(a => a.data_link.Equals(article.Id.ToString()) && a.attributes_with_metadata[0].value.is_enable == true);
            }

            // Save final processed article data to mongo db

            await MongoDBHandler.InsertFinalArticleData(articles);
        }