Exemplo n.º 1
0
        public async Task <Incident> LoadAsync()
        {
            try
            {
                var handler = new HttpClientHandler
                {
                    AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip
                };
                var client = new HttpClient(handler);

                // setting the default user agent
                if (client.DefaultRequestHeaders.UserAgent.Count == 0)
                {
                    client.DefaultRequestHeaders.UserAgent.ParseAdd("Azure Function");
                }

                var httpResponseMessage = await client.GetAsync(_uri);

                httpResponseMessage.EnsureSuccessStatusCode();

                // There is a bug in the .net framework that causes ReadAsStringAsync() to fail if the server reports the content encoding as "utf-8" rather than utf-8 https://github.com/dotnet/corefx/issues/5014
                if (httpResponseMessage.Content.Headers.ContentType?.CharSet == @"""utf-8""")
                {
                    httpResponseMessage.Content.Headers.ContentType.CharSet = "UTF-8";
                }

                var data = await httpResponseMessage.Content.ReadAsStringAsync();

                data = StringSanitizer.SimplifyHtmlEncoded(data);

                var sr = new SmartReader.Reader(_uri, data);
                sr.AddCustomOperationStart(SpaceElements);
                var article = sr.GetArticle();
                var content = !string.IsNullOrEmpty(article.TextContent) ? article.TextContent : article.Excerpt;

                if (!string.IsNullOrEmpty(content))
                {
                    var shortSummary =
                        StringSanitizer.RemoveDoublespaces(
                            StringSanitizer.RemoveUrls(
                                StringSanitizer.RemoveHashtags(content)))
                        .Trim();

                    var summary =
                        StringSanitizer.RemoveDoublespaces(content)
                        .Trim();
                    return(new Incident(shortSummary, summary));
                }
            }
            catch (Exception e)
            {
                _logger.LogError(e, "Exception loading article");
            }

            return(null);
        }
        // Clean up for the ShortSummary
        // The short summary is the shortened version of the summary that is optimized to be processed by LUIS
        private string ShortSummaryCleanUp(string body)
        {
            string result = StringSanitizer.RemoveHtmlTags(body);

            result = StringSanitizer.SimplifyHtmlEncoded(result);
            result = StringSanitizer.RemoveFillerWords(result);
            result = StringSanitizer.RemoveSpecialCharacters(result);
            result = StringSanitizer.RemoveDoublespaces(result);
            return(result.Trim());
        }