private static async Task <TextAnalysisResult> PerformTweetLinkContentAnalysisIfNotAlreadyDoneAsync(string fileName, string link) { string destinationFile = $"{fileName}.txt"; WebPage webPage = await LoadFromFileAsync <WebPage>(destinationFile); if (webPage == null) { // Potentially, each web page may need a dedicated text extractor for optimal results // Here, for example, we have a customized extractor for TechCrunch/Mashable/Twitter pages webPage = await m_WebScraper.DownloadWebPageAsync(link); if (webPage == null) { return(null); } // Store webpage for later user await StoreInFileAsync(webPage, destinationFile); } // Analyze the content pointed by link in the tweet Console.WriteLine($"\t\t{link}"); destinationFile = $"{fileName}_results.txt"; if (!File.Exists(destinationFile)) { var linkContentAnalysisResult = await m_ContentAnalyzer.AnalyzeTextAsync(webPage.Text); // Store results for later user await StoreInFileAsync(linkContentAnalysisResult, destinationFile); return(linkContentAnalysisResult); } else { return(await LoadFromFileAsync <TextAnalysisResult>(destinationFile)); } }
public static async Task Main(string[] args) { WriteHeader(); // Get /bin folder path // See: https://github.com/dotnet/project-system/issues/2239 var executableFolder = Path.GetDirectoryName(Assembly.GetEntryAssembly().Location); // Load configurable parameters from a json file // !! REMEMBER TO CONFIGURE YOUR CognitiveServices API key !! var configuration = LoadConfigurationFromJsonFile(Path.Combine(executableFolder, "appsettings.json")); // Initialize services and components. For simplicity, we do not use IoC/DI. SetupServices(configuration); // Setup a folder where to store analysis results string resultsPath = Path.Combine(executableFolder, "Results"); if (!Directory.Exists(resultsPath)) { Directory.CreateDirectory(resultsPath); } // Retrieve a list of feed items to analyze, either from online sources or from previous saved files IList <Model.FeedItem> feedItems = await GetFeedItems(args, resultsPath); // Analyze feed items foreach (var feedItem in feedItems) { var textAnalysisCollection = new List <Common.Model.TextAnalysisResult>(); string destinationFile = $"{resultsPath}\\{feedItem.FeedId}_{feedItem.Id}_content.txt"; // Load from existing file or download the link pointed by feed item and perform content clean-up (ad-hoc) Common.Model.WebPage webPage = await LoadFromFileAsync <Common.Model.WebPage>(destinationFile); if (webPage == null) { webPage = await m_WebScraper.DownloadWebPageAsync(feedItem.Link); if (webPage != null) { // Store web page content for later user await StoreInFileAsync(webPage, destinationFile); } } // Analyze the content summary in the feed item (if not already done) destinationFile = $"{resultsPath}\\{feedItem.FeedId}_{feedItem.Id}_summary_results.txt"; Common.Model.TextAnalysisResult summaryAnalysisResult; if (!File.Exists(destinationFile)) { summaryAnalysisResult = await m_ContentAnalyzer.AnalyzeTextAsync(feedItem.Summary); await StoreInFileAsync(summaryAnalysisResult, destinationFile); } else { summaryAnalysisResult = await LoadFromFileAsync <Common.Model.TextAnalysisResult>(destinationFile); } textAnalysisCollection.Add(summaryAnalysisResult); if (webPage != null) { // Analyze the content pointed by feed item (if not already done) // Note: this analysis may not be necessary. It seems that pointed content analysis // is redundant compared to summary analysis. It may happen that summary is not // present, so analyzing content is the only option. destinationFile = $"{resultsPath}\\{feedItem.FeedId}_{feedItem.Id}_content_results.txt"; Common.Model.TextAnalysisResult contentAnalysisResult; if (!File.Exists(destinationFile)) { contentAnalysisResult = await m_ContentAnalyzer.AnalyzeTextAsync(webPage.Text); await StoreInFileAsync(contentAnalysisResult, destinationFile); } else { contentAnalysisResult = await LoadFromFileAsync <Common.Model.TextAnalysisResult>(destinationFile); } textAnalysisCollection.Add(contentAnalysisResult); } // Store normalized entities for later use var dataSetEntry = feedItem.MapToDataSetEntry(Common.Model.DataSetEntryType.RssItem, textAnalysisCollection); await StoreInFileAsync(dataSetEntry, $"{resultsPath}\\{feedItem.FeedId}_{feedItem.Id}_DataSetEntry.txt"); } }