Beispiel #1
0
        private static async Task <TextAnalysisResult> PerformTweetLinkContentAnalysisIfNotAlreadyDoneAsync(string fileName, string link)
        {
            string destinationFile = $"{fileName}.txt";

            WebPage webPage = await LoadFromFileAsync <WebPage>(destinationFile);

            if (webPage == null)
            {
                // Potentially, each web page may need a dedicated text extractor for optimal results
                // Here, for example, we have a customized extractor for TechCrunch/Mashable/Twitter pages
                webPage = await m_WebScraper.DownloadWebPageAsync(link);

                if (webPage == null)
                {
                    return(null);
                }

                // Store webpage for later user
                await StoreInFileAsync(webPage, destinationFile);
            }

            // Analyze the content pointed by link in the tweet
            Console.WriteLine($"\t\t{link}");
            destinationFile = $"{fileName}_results.txt";
            if (!File.Exists(destinationFile))
            {
                var linkContentAnalysisResult = await m_ContentAnalyzer.AnalyzeTextAsync(webPage.Text);

                // Store results for later user
                await StoreInFileAsync(linkContentAnalysisResult, destinationFile);

                return(linkContentAnalysisResult);
            }
            else
            {
                return(await LoadFromFileAsync <TextAnalysisResult>(destinationFile));
            }
        }
Beispiel #2
0
        public static async Task Main(string[] args)
        {
            WriteHeader();

            // Get /bin folder path
            // See: https://github.com/dotnet/project-system/issues/2239
            var executableFolder = Path.GetDirectoryName(Assembly.GetEntryAssembly().Location);

            // Load configurable parameters from a json file
            // !! REMEMBER TO CONFIGURE YOUR CognitiveServices API key !!
            var configuration = LoadConfigurationFromJsonFile(Path.Combine(executableFolder, "appsettings.json"));

            // Initialize services and components. For simplicity, we do not use IoC/DI.
            SetupServices(configuration);

            // Setup a folder where to store analysis results
            string resultsPath = Path.Combine(executableFolder, "Results");

            if (!Directory.Exists(resultsPath))
            {
                Directory.CreateDirectory(resultsPath);
            }

            // Retrieve a list of feed items to analyze, either from online sources or from previous saved files
            IList <Model.FeedItem> feedItems = await GetFeedItems(args, resultsPath);

            // Analyze feed items
            foreach (var feedItem in feedItems)
            {
                var textAnalysisCollection = new List <Common.Model.TextAnalysisResult>();

                string destinationFile = $"{resultsPath}\\{feedItem.FeedId}_{feedItem.Id}_content.txt";

                // Load from existing file or download the link pointed by feed item and perform content clean-up (ad-hoc)
                Common.Model.WebPage webPage = await LoadFromFileAsync <Common.Model.WebPage>(destinationFile);

                if (webPage == null)
                {
                    webPage = await m_WebScraper.DownloadWebPageAsync(feedItem.Link);

                    if (webPage != null)
                    {
                        // Store web page content for later user
                        await StoreInFileAsync(webPage, destinationFile);
                    }
                }

                // Analyze the content summary in the feed item (if not already done)
                destinationFile = $"{resultsPath}\\{feedItem.FeedId}_{feedItem.Id}_summary_results.txt";
                Common.Model.TextAnalysisResult summaryAnalysisResult;
                if (!File.Exists(destinationFile))
                {
                    summaryAnalysisResult = await m_ContentAnalyzer.AnalyzeTextAsync(feedItem.Summary);
                    await StoreInFileAsync(summaryAnalysisResult, destinationFile);
                }
                else
                {
                    summaryAnalysisResult = await LoadFromFileAsync <Common.Model.TextAnalysisResult>(destinationFile);
                }

                textAnalysisCollection.Add(summaryAnalysisResult);

                if (webPage != null)
                {
                    // Analyze the content pointed by feed item (if not already done)
                    // Note: this analysis may not be necessary. It seems that pointed content analysis
                    //       is redundant compared to summary analysis. It may happen that summary is not
                    //       present, so analyzing content is the only option.
                    destinationFile = $"{resultsPath}\\{feedItem.FeedId}_{feedItem.Id}_content_results.txt";
                    Common.Model.TextAnalysisResult contentAnalysisResult;
                    if (!File.Exists(destinationFile))
                    {
                        contentAnalysisResult = await m_ContentAnalyzer.AnalyzeTextAsync(webPage.Text);
                        await StoreInFileAsync(contentAnalysisResult, destinationFile);
                    }
                    else
                    {
                        contentAnalysisResult = await LoadFromFileAsync <Common.Model.TextAnalysisResult>(destinationFile);
                    }

                    textAnalysisCollection.Add(contentAnalysisResult);
                }

                // Store normalized entities for later use
                var dataSetEntry = feedItem.MapToDataSetEntry(Common.Model.DataSetEntryType.RssItem, textAnalysisCollection);
                await StoreInFileAsync(dataSetEntry, $"{resultsPath}\\{feedItem.FeedId}_{feedItem.Id}_DataSetEntry.txt");
            }
        }