Exemple #1
0
        public static Dictionary <string, List <KeyValuePair <string, string> > > IdentifyEntities(List <string> lstSentence)
        {
            var response = new Dictionary <string, List <KeyValuePair <string, string> > >();
            // Create a client.
            ITextAnalyticsClient client = new TextAnalyticsClient(new ApiKeyServiceClientCredentials())
            {
                Endpoint = "https://westus.api.cognitive.microsoft.com"
            };

            //Replace 'westus' with the correct region for your Text Analytics subscription

            var inputList = lstSentence.Select((l, i) => new MultiLanguageInput("en", i.ToString(), l)).ToList();

            EntitiesBatchResult result = client.EntitiesAsync(new MultiLanguageBatchInput(inputList)).Result;

            // Printing entities results
            foreach (var document in result.Documents)
            {
                var entities = new List <KeyValuePair <string, string> >();
                foreach (EntityRecord entity in document.Entities)
                {
                    entities.Add(new KeyValuePair <string, string>(document.Id, entity.Name));
                }
                response.Add(document.Id, entities);
            }

            return(response);
        }
Exemple #2
0
        public static async Task <NameValueCollection> EntitiesInContent(TextAnalyticsClient client, string content, string language = "en")
        {
            var inputDocuments = new MultiLanguageBatchInput
            {
                Documents = new List <MultiLanguageInput>
                {
                    new MultiLanguageInput(id: "1", text: content, language: "en")
                }
            };

            var result = await client.EntitiesAsync(false, inputDocuments);

            if (!result.Documents.Any())
            {
                return(new NameValueCollection());
            }
            var document = result.Documents.First();

            var entities = System.Web.HttpUtility.ParseQueryString(string.Empty);

            foreach (var entity in document.Entities)
            {
                entities[entity.Type] = entity.Name;
            }
            return(entities);
        }
Exemple #3
0
                    public async Task RunAsync(string endpoint, string key, string text)
                    {
                        var credentials = new ApiKeyServiceClientCredentials(key);
                        var client      = new TextAnalyticsClient(credentials)
                        {
                            Endpoint = endpoint
                        };

                        // The documents to be submitted for entity recognition. The ID can be any value.
                        var inputDocuments = new MultiLanguageBatchInput(
                            new List <MultiLanguageInput>
                        {
                            new MultiLanguageInput("en", "1", text)
                        });

                        var entitiesResult = await client.EntitiesAsync(false, inputDocuments);

                        // Printing recognized entities
                        foreach (var document in entitiesResult.Documents)
                        {
                            foreach (var entity in document.Entities)
                            {
                                Entity.Add($"{entity.Name}[{entity.Type ?? "N/A"}]");
                                //Console.WriteLine($"\t\tName: {entity.Name},\tType: {entity.Type ?? "N/A"},\tSub-Type: {entity.SubType ?? "N/A"}");
                                //foreach (var match in entity.Matches)
                                //{
                                //    Console.WriteLine($"\t\t\tOffset: {match.Offset},\tLength: {match.Length},\tScore: {match.EntityTypeScore:F3}");
                                //}
                            }
                        }
                    }
        public static void Run(
            [ServiceBusTrigger("newreview", "entity", Connection = "topicConnectionString")] string topicMessage,
            ILogger log,
            [Blob("reviewentity", FileAccess.Read, Connection = "storageConnectionString")] CloudBlobContainer blobContainer
            )
        {
            DecoratedReviewerMessage decoratedMessage = JsonConvert.DeserializeObject <DecoratedReviewerMessage>(topicMessage);
            CloudBlockBlob           blob             = blobContainer.GetBlockBlobReference($"{decoratedMessage.MessageProperties.RequestCorrelationId}.json");
            ITextAnalyticsClient     client           = new TextAnalyticsClient(new ApiKeyServiceClientCredentials())
            {
                Endpoint = Environment.GetEnvironmentVariable("textAnalyticsEndpoint")
            };

            log.LogInformation($"[Request Correlation ID: {decoratedMessage.MessageProperties.RequestCorrelationId}] :: Beginning entity extraction");

            EntitiesBatchResult entityResult = client.EntitiesAsync(false,
                                                                    new MultiLanguageBatchInput(
                                                                        new List <MultiLanguageInput>()
            {
                new MultiLanguageInput("en", "0", decoratedMessage.verbatim)
            })).Result;
            string entitiesJson = JsonConvert.SerializeObject(entityResult.Documents[0].Entities);

            try {
                blob.UploadTextAsync(entitiesJson);
                log.LogInformation($"[Request Correlation ID: {decoratedMessage.MessageProperties.RequestCorrelationId}] :: Completed entity extraction :: TBC items extracted");
            } catch (Exception ex) {
                log.LogInformation($"[Request Correlation ID: {decoratedMessage.MessageProperties.RequestCorrelationId}] :: Incomplete entity extraction :: {ex.Message}");
            }
        }
Exemple #5
0
        public async Task <TextAnalyticsAnalyzeResponse> Analyze(string text, string languageHint)
        {
            var credentials = new ApiKeyServiceClientCredentials(_subscriptionKey);
            var client      = new TextAnalyticsClient(credentials)
            {
                Endpoint = _endpoint
            };

            var detectedLanguageResult = await client.DetectLanguageAsync(text, languageHint, true);

            if (string.IsNullOrWhiteSpace(languageHint))
            {
                languageHint = detectedLanguageResult.DetectedLanguages.FirstOrDefault()?.Iso6391Name ?? "";
            }

            var entitiesResult   = client.EntitiesAsync(text, languageHint, true);
            var keyPhrasesResult = client.KeyPhrasesAsync(text, languageHint, true);
            var sentimentResult  = client.SentimentAsync(text, languageHint, true);

            await Task.WhenAll(entitiesResult, keyPhrasesResult, sentimentResult);

            return(new TextAnalyticsAnalyzeResponse
            {
                DetectedLanguage = detectedLanguageResult,
                KeyPhrases = keyPhrasesResult.Result,
                Sentiment = sentimentResult.Result,

                Entities = entitiesResult.Result
            });
        }
Exemple #6
0
        public static async Task RunAsync(string endpoint, string key)
        {
            var credentials = new ApiKeyServiceClientCredentials(key);
            var client      = new TextAnalyticsClient(credentials)
            {
                Endpoint = endpoint
            };

            // The documents to be submitted for entity recognition. The ID can be any value.
            var inputDocuments = new MultiLanguageBatchInput(
                new List <MultiLanguageInput>
            {
                new MultiLanguageInput("en", "1", "Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975, to develop and sell BASIC interpreters for the Altair 8800."),
                new MultiLanguageInput("es", "2", "La sede principal de Microsoft se encuentra en la ciudad de Redmond, a 21 kilómetros de Seattle.")
            });

            var entitiesResult = await client.EntitiesAsync(false, inputDocuments);

            // Printing recognized entities
            foreach (var document in entitiesResult.Documents)
            {
                Console.WriteLine($"Document ID: {document.Id} ");

                Console.WriteLine("\t Entities:");

                foreach (var entity in document.Entities)
                {
                    Console.WriteLine($"\t\tName: {entity.Name},\tType: {entity.Type ?? "N/A"},\tSub-Type: {entity.SubType ?? "N/A"}");
                    foreach (var match in entity.Matches)
                    {
                        Console.WriteLine($"\t\t\tOffset: {match.Offset},\tLength: {match.Length},\tScore: {match.EntityTypeScore:F3}");
                    }
                }
            }
        }
        /// <summary>
        /// Submit the text of the input files to the Azure service, and
        /// output the results to tab-separated files.
        /// </summary>
        /// <param name="client"></param>
        /// <param name="files"></param>
        /// <param name="itemID"></param>
        /// <returns></returns>
        public static async Task RecognizeEntities(TextAnalyticsClient client, FileInfo[] files, string itemID)
        {
            // Read the text of the input files and build the input list to be submitted for entity recognition
            List <MultiLanguageInput> inputs = new List <MultiLanguageInput>();

            foreach (FileInfo file in files)
            {
                string fileText = File.ReadAllText(file.FullName);
                // The ID can be any value; we use the filename (which is the BHL Page ID)
                inputs.Add(new MultiLanguageInput(null, file.Name.Replace(file.Extension, ""), fileText));
            }

            // Call the Azure service to analyze the text
            var inputDocuments = new MultiLanguageBatchInput(inputs);
            var entitiesResult = await client.EntitiesAsync(false, inputDocuments);

            // Output the recognized entities from the Azure response
            WriteOutput(entitiesResult, itemID);
        }
        public static List <string> analyseEntities()
        {//https://docs.microsoft.com/en-us/azure/cognitive-services/Text-Analytics/quickstarts/csharp
            // Create a client.
            ITextAnalyticsClient client = new TextAnalyticsClient(new ApiKeyServiceClientCredentials())
            {
                Endpoint = "https://westeurope.api.cognitive.microsoft.com"
               //https://westeurope.api.cognitive.microsoft.com/text/analytics/v2.0
            }; //Replace 'westus' with the correct region for your Text Analytics subscription

            Console.OutputEncoding = System.Text.Encoding.UTF8;

            // Identify entities
            Console.WriteLine("\n\n===== ENTITIES ======");

            EntitiesBatchResult result4 = client.EntitiesAsync(
                new MultiLanguageBatchInput(
                    new List <MultiLanguageInput>()
            {
                new MultiLanguageInput("en", "0", "The Great Depression began in 1929. By 1933, the GDP in America fell by 25%.")
            })).Result;

            // Printing entities results
            List <string> entities = new List <string>();

            foreach (var document in result4.Documents)
            {
                Console.WriteLine("Document ID: {0} ", document.Id);

                Console.WriteLine("\t Entities:");

                foreach (EntityRecord entity in document.Entities)
                {
                    Console.WriteLine("\t\t" + entity.Name);
                    entities.Add(entity.Name);
                }
            }
            return(entities);
        }
        public static async Task RunSampleAsync(string endpoint, string key)
        {
            // Create a client.
            ITextAnalyticsClient client = new TextAnalyticsClient(new ApiKeyServiceClientCredentials(key))
            {
                Endpoint = endpoint
            };

            Console.OutputEncoding = System.Text.Encoding.UTF8;

            // Extracting language
            Console.WriteLine("===== LANGUAGE EXTRACTION ======");

            LanguageBatchResult result = await client.DetectLanguageAsync(
                new BatchInput(
                    new List <Input>()
            {
                new Input("1", "This is a document written in English."),
                new Input("2", "Este es un document escrito en Español."),
                new Input("3", "这是一个用中文写的文件")
            }));

            // Printing language results.
            foreach (var document in result.Documents)
            {
                Console.WriteLine("Document ID: {0} , Language: {1}", document.Id, document.DetectedLanguages[0].Name);
            }

            // Getting key-phrases
            Console.WriteLine("\n\n===== KEY-PHRASE EXTRACTION ======");

            KeyPhraseBatchResult result2 = await client.KeyPhrasesAsync(
                new MultiLanguageBatchInput(
                    new List <MultiLanguageInput>()
            {
                new MultiLanguageInput("ja", "1", "猫は幸せ"),
                new MultiLanguageInput("de", "2", "Fahrt nach Stuttgart und dann zum Hotel zu Fu."),
                new MultiLanguageInput("en", "3", "My cat is stiff as a rock."),
                new MultiLanguageInput("es", "4", "A mi me encanta el fútbol!")
            }));


            // Printing keyphrases
            foreach (var document in result2.Documents)
            {
                Console.WriteLine("Document ID: {0} ", document.Id);

                Console.WriteLine("\t Key phrases:");

                foreach (string keyphrase in document.KeyPhrases)
                {
                    Console.WriteLine("\t\t" + keyphrase);
                }
            }

            // Extracting sentiment
            Console.WriteLine("\n\n===== SENTIMENT ANALYSIS ======");

            SentimentBatchResult result3 = await client.SentimentAsync(
                new MultiLanguageBatchInput(
                    new List <MultiLanguageInput>()
            {
                new MultiLanguageInput("en", "0", "I had the best day of my life."),
                new MultiLanguageInput("en", "1", "This was a waste of my time. The speaker put me to sleep."),
                new MultiLanguageInput("es", "2", "No tengo dinero ni nada que dar..."),
                new MultiLanguageInput("it", "3", "L'hotel veneziano era meraviglioso. È un bellissimo pezzo di architettura."),
            }));


            // Printing sentiment results
            foreach (var document in result3.Documents)
            {
                Console.WriteLine("Document ID: {0} , Sentiment Score: {1:0.00}", document.Id, document.Score);
            }

            // Extracting entities
            Console.WriteLine("\n\n===== Entity Extraction ======");

            EntitiesBatchResultV2dot1 result4 = await client.EntitiesAsync(
                new MultiLanguageBatchInput(
                    new List <MultiLanguageInput>()
            {
                new MultiLanguageInput("en", "0", "Microsoft released win10. Microsoft also released Hololens"),
                new MultiLanguageInput("en", "1", "Microsoft is an IT company."),
                new MultiLanguageInput("es", "2", "Microsoft lanzó win10. Microsoft también lanzó Hololens"),
                new MultiLanguageInput("es", "3", "Microsoft es una empresa de TI."),
            }));


            // Printing entity extraction results
            foreach (var document in result4.Documents)
            {
                Console.WriteLine("Document ID: {0} ", document.Id);

                Console.WriteLine("\t Entities:");

                foreach (EntityRecordV2dot1 entity in document.Entities)
                {
                    Console.WriteLine("\t\tEntity Name: {0}", entity.Name);
                    Console.WriteLine("\t\tWikipedia Language: {0}", entity.WikipediaLanguage);
                    Console.WriteLine("\t\tWikipedia Url: {0}", entity.WikipediaUrl);
                    Console.WriteLine("\t\tNumber of times appeared on the text: {0}", entity.Matches.Count);
                    Console.WriteLine("\t\tEntity Type: {0}", entity.Type);
                    Console.WriteLine("\t\tEntity SubType: {0}", entity.SubType);
                    Console.WriteLine("\n");
                }
            }
        }
Exemple #10
0
        static void Main(string[] args)
        {
            // Create a client.
            ITextAnalyticsClient client = new TextAnalyticsClient(new ApiKeyServiceClientCredentials())
            {
                Endpoint = "https://centralindia.api.cognitive.microsoft.com"
            }; //Replace 'westus' with the correct region for your Text Analytics subscription

            Console.OutputEncoding = System.Text.Encoding.UTF8;

            // Extracting language
            Console.WriteLine("===== LANGUAGE EXTRACTION ======");

            var result = client.DetectLanguageAsync(new BatchInput(
                                                        new List <Input>()
            {
                new Input("1", "This is a document written in English."),
                new Input("2", "Este es un document escrito en Español."),
                new Input("3", "这是一个用中文写的文件")
            })).Result;

            // Printing language results.
            foreach (var document in result.Documents)
            {
                Console.WriteLine($"Document ID: {document.Id} , Language: {document.DetectedLanguages[0].Name}");
            }

            // Getting key-phrases
            Console.WriteLine("\n\n===== KEY-PHRASE EXTRACTION ======");

            KeyPhraseBatchResult result2 = client.KeyPhrasesAsync(new MultiLanguageBatchInput(
                                                                      new List <MultiLanguageInput>()
            {
                new MultiLanguageInput("ja", "1", "猫は幸せ"),
                new MultiLanguageInput("de", "2", "Fahrt nach Stuttgart und dann zum Hotel zu Fu."),
                new MultiLanguageInput("en", "3", "My cat is stiff as a rock."),
                new MultiLanguageInput("es", "4", "A mi me encanta el fútbol!")
            })).Result;

            // Printing keyphrases
            foreach (var document in result2.Documents)
            {
                Console.WriteLine($"Document ID: {document.Id} ");

                Console.WriteLine("\t Key phrases:");

                foreach (string keyphrase in document.KeyPhrases)
                {
                    Console.WriteLine($"\t\t{keyphrase}");
                }
            }

            // Extracting sentiment
            Console.WriteLine("\n\n===== SENTIMENT ANALYSIS ======");

            SentimentBatchResult result3 = client.SentimentAsync(
                new MultiLanguageBatchInput(
                    new List <MultiLanguageInput>()
            {
                new MultiLanguageInput("en", "0", "I had the best day of my life."),
                new MultiLanguageInput("en", "1", "This was a waste of my time. The speaker put me to sleep."),
                new MultiLanguageInput("es", "2", "No tengo dinero ni nada que dar..."),
                new MultiLanguageInput("it", "3", "L'hotel veneziano era meraviglioso. È un bellissimo pezzo di architettura."),
            })).Result;


            // Printing sentiment results
            foreach (var document in result3.Documents)
            {
                Console.WriteLine($"Document ID: {document.Id} , Sentiment Score: {document.Score:0.00}");
            }


            // Identify entities
            Console.WriteLine("\n\n===== ENTITIES ======");

            EntitiesBatchResultV2dot1 result4 = client.EntitiesAsync(
                new MultiLanguageBatchInput(
                    new List <MultiLanguageInput>()
            {
                new MultiLanguageInput("en", "0", "The Great Depression began in 1929. By 1933, the GDP in America fell by 25%.")
            })).Result;

            // Printing entities results
            foreach (var document in result4.Documents)
            {
                Console.WriteLine($"Document ID: {document.Id} ");

                Console.WriteLine("\t Entities:");

                foreach (EntityRecordV2dot1 entity in document.Entities)
                {
                    Console.WriteLine($"\t\t{entity.Name}\t\t{entity.WikipediaUrl}\t\t{entity.Type}\t\t{entity.SubType}");
                }
            }

            Console.ReadLine();
        }
        public async Task BuildTextWithLinksAsync(TeamEvent teamEvent)
        {
            var input            = teamEvent.Description;
            var entitiesResponse = await _client.EntitiesAsync(input, "en");

            var keyPhrasesResponse = await _client.KeyPhrasesAsync(input, "en");

            var words = new SortedSet <string>();

            foreach (var entity in entitiesResponse.Entities)
            {
                words.Add($"{entity.Name}_{entity.Type.Replace("DateTime", "Date")}");
            }

            foreach (var keyPhrase in keyPhrasesResponse.KeyPhrases)
            {
                words.Add($"{keyPhrase}_Phrase");
            }

            var photos = new HashSet <dynamic>();

            foreach (var word in words)
            {
                var text = word.Split('_')[0];
                var type = word.Split('_')[1];

                using var httpClient = new HttpClient();
                var encodedEntity = HttpUtility.UrlEncode(text);

                var wikiResponseRaw = await httpClient.GetStringAsync(
                    $"{WikiSearchEndpoint}?action=opensearch&" +
                    $"search={encodedEntity}&" +
                    "limit=1&" +
                    "namespace=0&" +
                    "format=json");

                if (!string.IsNullOrEmpty(wikiResponseRaw))
                {
                    var wikiResponse = (JArray)JsonConvert.DeserializeObject(wikiResponseRaw);
                    if (wikiResponse.Count == 4)
                    {
                        var links = (JArray)wikiResponse[3];
                        if (links.Count == 1)
                        {
                            var link = ((JArray)wikiResponse[3])[0].Value <string>();
                            input = ReplaceWithLink(input, " ", text, link);
                            input = ReplaceWithLink(input, ", ", text, link);
                            input = ReplaceWithLink(input, ". ", text, link);

                            teamEvent.Description = input;

                            var wikiImageResponse = await httpClient.GetStringAsync(
                                $"{WikiSearchEndpoint}?action=query&" +
                                "prop=pageimages&" +
                                "formatversion=2&" +
                                "format=json&" +
                                "piprop=original&" +
                                $"titles={text}");

                            var imageUrl = ((JObject)JsonConvert.DeserializeObject(wikiImageResponse))
                                           .SelectToken("$.query.pages[0].original.source")?.Value <string>();

                            if (!string.IsNullOrEmpty(imageUrl))
                            {
                                photos.Add(new { imageUrl = imageUrl, title = text, link = link, type = type });
                            }
                        }
                    }
                }
            }

            teamEvent.Photos = JsonConvert.SerializeObject(photos);
        }
Exemple #12
0
        public static async Task <IActionResult> Run(
            [HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = null)] HttpRequest req,
            ILogger log)
        {
            string cognitive_service_key      = Environment.GetEnvironmentVariable("cognitive_service_key");
            string cognitive_service_endpoint = Environment.GetEnvironmentVariable("cognitive_service_endpoint");

            int SentencesToSummarize = 3;

            string  requestBody = await new StreamReader(req.Body).ReadToEndAsync();
            dynamic data        = JsonConvert.DeserializeObject(requestBody);
            string  inputText   = data.text;

            var credentials = new ApiKeyServiceClientCredentials(cognitive_service_key);
            var client      = new TextAnalyticsClient(credentials)
            {
                Endpoint = cognitive_service_endpoint
            };

            dynamic result = new JObject();

            //Detecting language first
            var inputDocuments = new LanguageBatchInput(
                new List <LanguageInput>
            {
                new LanguageInput(id: "1", text: inputText)
            });

            var langResults = await client.DetectLanguageAsync(false, inputDocuments);

            string inputLanguage = null;

            foreach (var document in langResults.Documents)
            {
                inputLanguage = document.DetectedLanguages[0].Iso6391Name;
            }

            result.language = inputLanguage;
            log.LogInformation($"{result.ToString()}");

            //Detecting sentiment of the input text
            var inputDocuments2 = new MultiLanguageBatchInput(
                new List <MultiLanguageInput>
            {
                new MultiLanguageInput(inputLanguage, "1", inputText)
            });

            var sentimentResult = await client.SentimentAsync(false, inputDocuments2);

            double?sentimentScore = 0;

            foreach (var document in sentimentResult.Documents)
            {
                sentimentScore = document.Score;
            }

            result.sentimentScore = sentimentScore;
            log.LogInformation($"{result.ToString()}");

            //Detecting entities in the text
            var entitiesResult = await client.EntitiesAsync(false, inputDocuments2);

            JArray entities = new JArray();

            foreach (var document in entitiesResult.Documents)
            {
                dynamic entityObject = new JObject();
                foreach (var entity in document.Entities)
                {
                    entityObject.name    = entity.Name;
                    entityObject.type    = entity.Type;
                    entityObject.subtype = entity.SubType;
                    foreach (var match in entity.Matches)
                    {
                        entityObject.offset = match.Offset;
                        entityObject.length = match.Length;
                        entityObject.score  = match.EntityTypeScore;
                        //log.LogInformation($"\t\t\tOffset: {match.Offset},\tLength: {match.Length},\tScore: {match.EntityTypeScore:F3}");
                    }
                    entities.Add(entityObject);
                }
            }
            result.entities = entities;
            log.LogInformation($"{result.ToString()}");

            //Detecting keyphrases
            var kpResults = await client.KeyPhrasesAsync(false, inputDocuments2);

            JArray keyPhrases = new JArray();
            var    Phrases    = new List <string>();

            // Printing keyphrases
            foreach (var document in kpResults.Documents)
            {
                foreach (string keyphrase in document.KeyPhrases)
                {
                    keyPhrases.Add(keyphrase);
                    Phrases.Add(keyphrase);
                }
            }
            result.keyphrases = keyPhrases;

            //Generating text summary
            String[] sentences = inputText.Split('!', '.', '?');

            List <Match> matchList = new List <Match>();
            int          counter   = 0;
            // Take the 10 best words
            var topPhrases = Phrases.Take(10);

            foreach (var sentence in sentences)
            {
                double count = 0;

                Match match = new Match();
                foreach (var phrase in topPhrases)
                {
                    if ((sentence.ToLower().IndexOf(phrase) > -1) &&
                        (sentence.Length > 20) && (WordCount(sentence) >= 3))
                    {
                        count++;
                    }
                    ;
                }

                if (count > 0)
                {
                    matchList.Add(new Match {
                        sentence = counter, total = count
                    });
                }
                counter++;
            }

            var           MatchList     = matchList.OrderByDescending(y => y.total).Take(SentencesToSummarize).OrderBy(x => x.sentence).ToList();
            StringBuilder summary       = new StringBuilder();
            List <string> SentenceList  = new List <string>();
            int           sentenceCount = 0;

            for (int i = 0; i < MatchList.Count; i++)
            {
                summary.Append(sentences[MatchList[i].sentence] + ".");
                sentenceCount++;
            }
            // If there are no sentences found, just take the first three
            if (sentenceCount == 0)
            {
                for (int i = 0; i < Math.Min(SentencesToSummarize, sentences.Count()); i++)
                {
                    summary.Append(sentences[MatchList[i].sentence] + ".");
                }
            }

            result.summary = summary.ToString();
            log.LogInformation($"{result.ToString()}");

            return(inputText != null
                ? (ActionResult) new OkObjectResult($"{result.ToString()}")
                : new BadRequestObjectResult("{ \"error\": \"Please pass the text input for the text analytics operations\""));
        }
        public static List <SentimentResults> fullAnalysis(List <DB_Service.CrimeTweets> crimeTweets)
        {//-------------------------------------------------------------------------------------------------------------------------------------
            // Create a client.
            ITextAnalyticsClient client = new TextAnalyticsClient(new ApiKeyServiceClientCredentials())
            {
                Endpoint = "https://westeurope.api.cognitive.microsoft.com"
                           //https://westeurope.api.cognitive.microsoft.com/text/analytics/v2.0
            };

            //-------------------------------------------------------------------------------------------------------------------------------------
            // Extracting language

            List <Input> myInp = new List <Input>();

            foreach (DB_Service.CrimeTweets ct in crimeTweets)
            {
                Input inp = new Input(ct.tweet_id.ToString(), ct.message);
                myInp.Add(inp);
            }

            var result = client.DetectLanguageAsync(new BatchInput(myInp)).Result;

            List <SentimentResults> tweetLangs = new List <SentimentResults>();

            // Printing language results.
            foreach (var document in result.Documents)
            {
                SentimentResults sr = new SentimentResults();
                sr.setTweet_id(Int32.Parse(document.Id));
                sr.setLanguage_short(document.DetectedLanguages[0].Iso6391Name);
                sr.setLanguage(document.DetectedLanguages[0].Name);
                tweetLangs.Add(sr);
            }
            //-------------------------------------------------------------------------------------------------------------------------------------
            // Getting key-phrases

            List <MultiLanguageInput> keyPhrases = new List <MultiLanguageInput>();//Key phrases
            int count = 0;

            foreach (DB_Service.CrimeTweets ct in crimeTweets)
            {
                string             tempLang = tweetLangs.ElementAt <SentimentResults>(count).getLanguage_short();
                MultiLanguageInput inp      = new MultiLanguageInput(tempLang, ct.tweet_id.ToString(), ct.message);
                keyPhrases.Add(inp);
                count++;
            }

            KeyPhraseBatchResult result2 = client.KeyPhrasesAsync(new MultiLanguageBatchInput(keyPhrases)).Result;

            // Printing keyphrases
            List <string>           phrases         = new List <string>();
            List <SentimentResults> tweetKeyPhrases = new List <SentimentResults>();

            count = 0;
            foreach (var document in result2.Documents)
            {
                foreach (string keyphrase in document.KeyPhrases)
                {
                    phrases.Add(keyphrase);
                }
                SentimentResults sr = new SentimentResults();
                sr = tweetLangs.ElementAt <SentimentResults>(count);
                sr.setKeyPhrases(phrases);
                tweetKeyPhrases.Add(sr);
                count++;
            }
            //-------------------------------------------------------------------------------------------------------------------------------------
            // Getting Sentiment Analysis

            List <MultiLanguageInput> sentiAni = new List <MultiLanguageInput>();//Sentiment Analysis

            count = 0;
            foreach (DB_Service.CrimeTweets ct in crimeTweets)
            {
                string             tempLang = tweetKeyPhrases.ElementAt <SentimentResults>(count).getLanguage_short();
                MultiLanguageInput inp      = new MultiLanguageInput(tempLang, ct.tweet_id.ToString(), ct.message);
                sentiAni.Add(inp);
                count++;
            }

            SentimentBatchResult result3 = client.SentimentAsync(new MultiLanguageBatchInput(sentiAni)).Result;

            // Printing sentiment results
            List <SentimentResults> tweetSentiments = new List <SentimentResults>();

            count = 0;

            foreach (var document in result3.Documents)
            {
                SentimentResults sr = new SentimentResults();
                sr = tweetKeyPhrases.ElementAt <SentimentResults>(count);
                sr.setSenti_score((double)document.Score);
                tweetSentiments.Add(sr);
                count++;
            }

            //-------------------------------------------------------------------------------------------------------------------------------------
            // Getting Entities

            //Continue using the same list so languages wont change
            EntitiesBatchResult result4 = client.EntitiesAsync(new MultiLanguageBatchInput(sentiAni)).Result;

            // Printing entities results
            List <string>           entitiySet    = new List <string>();
            List <SentimentResults> tweetEntities = new List <SentimentResults>();

            count = 0;

            foreach (var document in result4.Documents)
            {
                foreach (EntityRecord entitiy in document.Entities)
                {
                    entitiySet.Add(entitiy.Name);
                }
                SentimentResults sr = new SentimentResults();
                sr = tweetSentiments.ElementAt <SentimentResults>(count);
                sr.setEntities(entitiySet);
                tweetEntities.Add(sr);
                count++;
            }

            //-------------------------------------------------------------------------------------------------------------------------------------
            //Add Data to Database Service

            List <DB_Service.Sentiments> completeSentiments = new List <DB_Service.Sentiments>();
            List <DB_Service.Entities>   completeEntities   = new List <DB_Service.Entities>();

            foreach (SentimentResults finalResults in tweetEntities)
            {
                //Start building Sentiment class
                DB_Service.Sentiments newSenti = new DB_Service.Sentiments();
                newSenti.tweet_id         = finalResults.getTweet_id();
                newSenti.sentiment_total  = finalResults.getSenti_score();
                newSenti.category_primary = finalResults.getLanguage() + ", " + finalResults.getLanguage_short();

                List <string> entList     = finalResults.getEntities();
                List <string> phraseList  = finalResults.getKeyPhrases();
                StringBuilder wholePhrase = new StringBuilder("");
                count = 0;
                //Start building Entity Class
                if (entList != null && entList.Count > 0)
                {
                    foreach (string entity in entList)
                    {
                        wholePhrase.Append(entity + ",");
                        //DB_Service.Entities newEntity = new DB_Service.Entities();
                        //newEntity.name = entity;
                        //newEntity.sentiment_id = -1//this is a programming design problem
                    }
                }

                if (phraseList != null && phraseList.Count > 0)
                {
                    foreach (string word in phraseList)
                    {
                        count++;
                        if (phraseList.Count > count)
                        {
                            wholePhrase.Append(word + ",");
                        }
                        else
                        {
                            wholePhrase.Append(word);
                        }
                    }
                }

                newSenti.key_phrases = wholePhrase.ToString();

                //List<string> EntList = finalResults.getEntities();
                //if(EntList != null && EntList.Count > 0)
                //{
                //    //newSenti.category_primary = EntList.ElementAt<string>(0);

                //}
                //else
                //{
                //    newSenti.category_primary = "";
                //}
                //Finish building Sentiment Class
                completeSentiments.Add(newSenti);
            }
            //Add to service now
            DB_Service.ServiceClient service = new DB_Service.ServiceClient();
            service.addSentiments(completeSentiments);

            return(tweetEntities);
        }
        public static void TextAnalytics(PdfReader pdfReader, TraceWriter log, ResumeDocModel resumeDocModel)
        {
            // Create a client.
            ITextAnalyticsClient client = new TextAnalyticsClient(new ApiKeyServiceClientCredentialsForText())
            {
                Endpoint = TextAnalyticsEndPoint
            }; //Replace 'westus' with the correct region for your Text Analytics subscription

            Console.OutputEncoding = System.Text.Encoding.UTF8;



            // Extracting language
            log.Info("===== Text Analytics Started ======");
            string content = DocumentExtraction.GetTextFromPDF(pdfReader);


            List <string> splittedList = StringExtensions.Split(content, MaxLengthofCharacters).ToList();

            var LanguageDetectAPI = client.DetectLanguageAsync(new BatchInput(
                                                                   new List <Input>()
            {
                new Input(resumeDocModel.DocumentName, splittedList.First())
            })).Result;

            resumeDocModel.languageBatchResult = LanguageDetectAPI.Documents.FirstOrDefault();

            var detectedLanguage = LanguageDetectAPI.Documents.Select(doc => doc.DetectedLanguages[0].Iso6391Name).FirstOrDefault();

            //SentimentBatchResult result3 = client.SentimentAsync(
            //   new MultiLanguageBatchInput(
            //       new List<MultiLanguageInput>()
            //       {
            //              new MultiLanguageInput(detectedLanguage, resumeDocModel.DocumentName, splittedList.First())
            //       })).Result;

            //resumeDocModel.sentimentBatchResult = result3.Documents.FirstOrDefault();

            List <string> keyPhraseList = new List <string>();

            List <EntityRecordV2dot1> entityRecords = new List <EntityRecordV2dot1>();

            foreach (string splittedContent in splittedList)
            {
                KeyPhraseBatchResult keyPhraseBatch = client.KeyPhrasesAsync(new MultiLanguageBatchInput(
                                                                                 new List <MultiLanguageInput>()
                {
                    new MultiLanguageInput(detectedLanguage, resumeDocModel.DocumentName, splittedContent)
                })).Result;

                foreach (var doc in keyPhraseBatch.Documents)
                {
                    keyPhraseList.AddRange(doc.KeyPhrases.ToList());
                }


                EntitiesBatchResultV2dot1 entitiesbatchres = client.EntitiesAsync(
                    new MultiLanguageBatchInput(
                        new List <MultiLanguageInput>()
                {
                    new MultiLanguageInput(detectedLanguage, resumeDocModel.DocumentName, splittedContent)
                })).Result;

                entityRecords.AddRange(entitiesbatchres.Documents.First().Entities.ToList());
            }

            resumeDocModel.keyPhraseBatchResult.Id         = resumeDocModel.DocumentName;
            resumeDocModel.keyPhraseBatchResult.KeyPhrases = keyPhraseList;

            resumeDocModel.entityBatchResult.Id            = resumeDocModel.DocumentName;
            resumeDocModel.entityBatchResult.EntityRecords = entityRecords;



            log.Info("===== Text Analytics Completed ======");
        }
Exemple #15
0
        public async Task <string> FindLinksAndImages(string input, bool recursiveSearch, int recursionLevel = 0)
        {
            var photos = new List <dynamic>();
            var links  = new List <dynamic>();
            var words  = new SortedDictionary <string, string>();

            var language = await _client.DetectLanguageAsync(input);

            var languageIso = language.DetectedLanguages[0].Iso6391Name;

            var entitiesResponse = await _client.EntitiesAsync(input, languageIso);

            if (entitiesResponse.Entities != null)
            {
                foreach (var entity in entitiesResponse.Entities.Where(e => e.Type != "Quantity"))
                {
                    if (!words.ContainsKey(entity.Name))
                    {
                        words[entity.Name] = entity.Type.ToUpper();
                    }
                }
            }

            var keyPhrasesResponse = await _client.KeyPhrasesAsync(input, languageIso);

            if (keyPhrasesResponse.KeyPhrases != null)
            {
                foreach (var keyPhrase in keyPhrasesResponse.KeyPhrases)
                {
                    if (!words.ContainsKey(keyPhrase))
                    {
                        words[keyPhrase] = "PHRASE";
                    }
                }
            }

            foreach (var text in words.Keys)
            {
                var encodedEntity = HttpUtility.UrlEncode(text);
                var type          = words[text];
                using var httpClient = new HttpClient();
                var wikiResponseRaw = await httpClient.GetStringAsync(
                    $"{WikiSearchEndpoint}?action=opensearch&" +
                    $"search={encodedEntity}&" +
                    "limit=1&" +
                    "namespace=0&" +
                    "format=json");

                var wikiResponse = (JArray)JsonConvert.DeserializeObject(wikiResponseRaw);
                var url          = wikiResponse.SelectToken("$[3].[0]")?.Value <string>();
                var description  = wikiResponse.SelectToken("$[2].[0]")?.Value <string>();

                if (!string.IsNullOrEmpty(url))
                {
                    var wikiImageResponse = await httpClient.GetStringAsync(
                        $"{WikiSearchEndpoint}?action=query&" +
                        "prop=pageimages&" +
                        "formatversion=2&" +
                        "format=json&" +
                        "piprop=original&" +
                        $"titles={text}");

                    var imageUrl = ((JObject)JsonConvert.DeserializeObject(wikiImageResponse))
                                   .SelectToken("$.query.pages[0].original.source")?.Value <string>();

                    imageUrl = string.IsNullOrEmpty(imageUrl) ? PlaceholderImage : imageUrl;

                    string innerSearchRaw = null;
                    object innerSearch    = null;
                    if (recursiveSearch && recursionLevel == 0 && description.Length > 30)
                    {
                        innerSearchRaw = await FindLinksAndImages(description, recursiveSearch, recursionLevel + 1);

                        innerSearch = JsonConvert.DeserializeObject(innerSearchRaw);
                    }

                    links.Add(new
                    {
                        text        = text,
                        url         = url,
                        type        = type,
                        description = description,
                        imageUrl    = imageUrl,
                        innerSearch = innerSearch
                    });
                }
            }

            return(JsonConvert.SerializeObject(new
            {
                words = words.Select(w => new { text = w.Key, type = w.Value }),
                links = links
            }));
        }
Exemple #16
0
        /// <summary>
        /// Text Analytics - V2 - Key Phrases & Entities
        /// </summary>
        /// <param name="keyPhrasesSamples"></param>
        /// <returns></returns>
        public static Tuple <KeyPhraseBatchResult, EntitiesBatchResult> TextAnalyticsKeyPhrasesAndEntities(List <KeyValuePair <string, string> > keyPhrasesSamples, ref CognitiveServicesApiCalls cognitiveServicesApiCalls)
        {
            var creds = new ApiKeyServiceClientCredentials();

            // Build client API call
            ITextAnalyticsClient client = new TextAnalyticsClient(creds)
            {
                Endpoint = Config.COGNITIVE_SERVICES_REGION_URI
            };

            // Getting key-phrases
            var lengthofText = keyPhrasesSamples.Select((v, i) => v.Value.ToString().Length).Sum();

            //Console.WriteLine(string.Format("\tDocs: {0}", keyPhrasesSamples.Count));
            Console.WriteLine(string.Format("\tCharacters: {0}", lengthofText));

            var multiLanguageInputs       = (keyPhrasesSamples.Select((v, i) => new MultiLanguageInput(v.Key, i.ToString(), v.Value)).ToList());
            var multiLanguageInputsString = String.Join(string.Empty, multiLanguageInputs.Select(a => a.Text).ToList());
            //Console.WriteLine("OCR Text Sent for key phrases: " + Math.Round(mb, 3));

            // Send batches of 100 inputs
            int batches = multiLanguageInputs.Count / 100 + 1;

            var test = new Tuple <KeyPhraseBatchResult, EntitiesBatchResult>(null, null);

            ;
            var keyPhraseBatchResults = new List <KeyPhraseBatchResult>();
            var entityBatchResults    = new List <EntitiesBatchResult>();

            for (int i = 0; i != batches; i++)
            {
                // set up the batches
                var multiLanguageInputsToProcess = multiLanguageInputs.Skip(i * 100).Take(100).ToList();

                if (multiLanguageInputsToProcess.Count > 0)
                {
                    var multiLanguageBatch = new MultiLanguageBatchInput(multiLanguageInputsToProcess);

                    Console.WriteLine(string.Format("\tProcessing Batch {0} of {1}", (i + 1), batches));

                    // key phrases result
                    var keyPhraseMiniBatchResult = client.KeyPhrasesAsync(true,
                                                                          new MultiLanguageBatchInput(multiLanguageInputsToProcess)).Result;
                    keyPhraseBatchResults.Add(keyPhraseMiniBatchResult);
                    cognitiveServicesApiCalls.ApiCallV2Count++;

                    var entitiesMiniBatchResult = client.EntitiesAsync(true,
                                                                       new MultiLanguageBatchInput(multiLanguageInputsToProcess)).Result;
                    entityBatchResults.Add(entitiesMiniBatchResult);
                    cognitiveServicesApiCalls.ApiCallV2Count++;
                }
            }

            var keyPhraseDocuments   = keyPhraseBatchResults.SelectMany(i => i.Documents).ToList();
            var keyPhraseErrors      = keyPhraseBatchResults.SelectMany(i => i.Errors).ToList();
            var keyPhraseBatchResult = new KeyPhraseBatchResult(keyPhraseDocuments, keyPhraseErrors);

            var entitiesDcouments   = entityBatchResults.SelectMany(i => i.Documents).ToList();
            var entitiesErrors      = entityBatchResults.SelectMany(i => i.Errors).ToList();
            var entitiesBatchResult = new EntitiesBatchResult(entitiesDcouments, entitiesErrors);

            //var tuple = (KeyPhraseBatchResult: keyPhraseBatchResult, EntitiesBatchResult: entitiesBatchResult);

            return(new Tuple <KeyPhraseBatchResult, EntitiesBatchResult>(keyPhraseBatchResult, entitiesBatchResult));
        }