public static Dictionary <string, List <KeyValuePair <string, string> > > IdentifyEntities(List <string> lstSentence) { var response = new Dictionary <string, List <KeyValuePair <string, string> > >(); // Create a client. ITextAnalyticsClient client = new TextAnalyticsClient(new ApiKeyServiceClientCredentials()) { Endpoint = "https://westus.api.cognitive.microsoft.com" }; //Replace 'westus' with the correct region for your Text Analytics subscription var inputList = lstSentence.Select((l, i) => new MultiLanguageInput("en", i.ToString(), l)).ToList(); EntitiesBatchResult result = client.EntitiesAsync(new MultiLanguageBatchInput(inputList)).Result; // Printing entities results foreach (var document in result.Documents) { var entities = new List <KeyValuePair <string, string> >(); foreach (EntityRecord entity in document.Entities) { entities.Add(new KeyValuePair <string, string>(document.Id, entity.Name)); } response.Add(document.Id, entities); } return(response); }
public async Task Entities() { using (MockContext context = MockContext.Start(this.GetType().FullName)) { HttpMockServer.Initialize(this.GetType().FullName, "Entities"); ITextAnalyticsClient client = GetClient(HttpMockServer.CreateInstance()); EntitiesBatchResult result = await client.EntitiesAsync( null, new MultiLanguageBatchInput( new List <MultiLanguageInput>() { new MultiLanguageInput() { Id = "id", Text = "Microsoft released Windows 10", Language = "en" } })); Assert.Equal("Microsoft", result.Documents[0].Entities[0].Name); Assert.Equal("a093e9b9-90f5-a3d5-c4b8-5855e1b01f85", result.Documents[0].Entities[0].BingId); Assert.Equal("Microsoft", result.Documents[0].Entities[0].Matches[0].Text); Assert.Equal(0.12508682244047509, result.Documents[0].Entities[0].Matches[0].WikipediaScore); Assert.Equal(0.99999618530273438, result.Documents[0].Entities[0].Matches[0].EntityTypeScore); context.Stop(); } }
public static void Run( [ServiceBusTrigger("newreview", "entity", Connection = "topicConnectionString")] string topicMessage, ILogger log, [Blob("reviewentity", FileAccess.Read, Connection = "storageConnectionString")] CloudBlobContainer blobContainer ) { DecoratedReviewerMessage decoratedMessage = JsonConvert.DeserializeObject <DecoratedReviewerMessage>(topicMessage); CloudBlockBlob blob = blobContainer.GetBlockBlobReference($"{decoratedMessage.MessageProperties.RequestCorrelationId}.json"); ITextAnalyticsClient client = new TextAnalyticsClient(new ApiKeyServiceClientCredentials()) { Endpoint = Environment.GetEnvironmentVariable("textAnalyticsEndpoint") }; log.LogInformation($"[Request Correlation ID: {decoratedMessage.MessageProperties.RequestCorrelationId}] :: Beginning entity extraction"); EntitiesBatchResult entityResult = client.EntitiesAsync(false, new MultiLanguageBatchInput( new List <MultiLanguageInput>() { new MultiLanguageInput("en", "0", decoratedMessage.verbatim) })).Result; string entitiesJson = JsonConvert.SerializeObject(entityResult.Documents[0].Entities); try { blob.UploadTextAsync(entitiesJson); log.LogInformation($"[Request Correlation ID: {decoratedMessage.MessageProperties.RequestCorrelationId}] :: Completed entity extraction :: TBC items extracted"); } catch (Exception ex) { log.LogInformation($"[Request Correlation ID: {decoratedMessage.MessageProperties.RequestCorrelationId}] :: Incomplete entity extraction :: {ex.Message}"); } }
/// <summary> /// Write one file for each analyzed document, as well as a file that contains /// the data for all documents. /// </summary> /// <param name="entitiesResult"></param> /// <param name="itemID"></param> private static void WriteOutput(EntitiesBatchResult entitiesResult, string itemID) { List <string> outputLines = new List <string>(); // Add the header for the output file outputLines.Add("ItemID\tSeq\tPageID\tName\tType\tSubType\tWikipediaID\tWikipediaLanguage\tWikipediaUrl\tOffset\tLength\tScore\tWikipediaScore\tIsScientificName"); int docCount = 0; int sequence = 1; foreach (var document in entitiesResult.Documents) { foreach (var entity in document.Entities) { // Read the data to be included in the output file string pageID = document.Id; string eName = entity.Name.Replace('\n', ' ').Replace('\r', ' '); string eType = entity.Type ?? "N/A"; string eSubType = entity.SubType ?? "N/A"; string eWikipediaId = entity.WikipediaId ?? "N/A"; string eWikipediaLanguage = entity.WikipediaLanguage ?? "N/A"; string eWikipediaUrl = entity.WikipediaUrl ?? "N/A"; foreach (var match in entity.Matches) { // Determine if the entity is a scientific name string isName = IsSciName(entity) ? "True" : "False"; // Build the data to be output string outputLine = string.Format($"{itemID}\t{sequence}\t{pageID}\t{eName}\t{eType}\t{eSubType}\t{eWikipediaId}\t{eWikipediaLanguage}\t{eWikipediaUrl}\t{match.Offset}\t{match.Length}\t{match.EntityTypeScore:F3}\t{match.WikipediaScore:F3}\t{isName}"); outputLines.Add(outputLine); sequence++; } } docCount++; Console.WriteLine($"{docCount} documents processed"); } // Write the accumulated output file with the data from all documents if (!Directory.Exists(Config.OutputFolder)) { Directory.CreateDirectory(Config.OutputFolder); } File.WriteAllLines(string.Format("{0}\\Item{1}.tsv", Config.OutputFolder, itemID), outputLines.ToArray(), Encoding.UTF8); }
public void Entities() { using (MockContext context = MockContext.Start(this.GetType().FullName)) { HttpMockServer.Initialize(this.GetType().FullName, "Entities"); ITextAnalyticsClient client = GetClient(HttpMockServer.CreateInstance()); EntitiesBatchResult result = client.Entities( "Microsoft released Windows 10"); Assert.Equal("Microsoft", result.Documents[0].Entities[0].Name); Assert.Equal("a093e9b9-90f5-a3d5-c4b8-5855e1b01f85", result.Documents[0].Entities[0].BingId); Assert.Equal("Microsoft", result.Documents[0].Entities[0].Matches[0].Text); Assert.Equal(0.12508682244047509, result.Documents[0].Entities[0].Matches[0].WikipediaScore); Assert.Equal(0.99999618530273438, result.Documents[0].Entities[0].Matches[0].EntityTypeScore); context.Stop(); } }
public static List <string> analyseEntities() {//https://docs.microsoft.com/en-us/azure/cognitive-services/Text-Analytics/quickstarts/csharp // Create a client. ITextAnalyticsClient client = new TextAnalyticsClient(new ApiKeyServiceClientCredentials()) { Endpoint = "https://westeurope.api.cognitive.microsoft.com" //https://westeurope.api.cognitive.microsoft.com/text/analytics/v2.0 }; //Replace 'westus' with the correct region for your Text Analytics subscription Console.OutputEncoding = System.Text.Encoding.UTF8; // Identify entities Console.WriteLine("\n\n===== ENTITIES ======"); EntitiesBatchResult result4 = client.EntitiesAsync( new MultiLanguageBatchInput( new List <MultiLanguageInput>() { new MultiLanguageInput("en", "0", "The Great Depression began in 1929. By 1933, the GDP in America fell by 25%.") })).Result; // Printing entities results List <string> entities = new List <string>(); foreach (var document in result4.Documents) { Console.WriteLine("Document ID: {0} ", document.Id); Console.WriteLine("\t Entities:"); foreach (EntityRecord entity in document.Entities) { Console.WriteLine("\t\t" + entity.Name); entities.Add(entity.Name); } } return(entities); }
private static void ProcessEntities(string documentid, string text) { ITextAnalyticsAPI client = new TextAnalyticsAPI(new ApiKeyServiceClientCredentials("key here")); client.AzureRegion = AzureRegions.Westeurope; EntitiesBatchResult result2 = client.EntitiesAsync(new MultiLanguageBatchInput( new List <MultiLanguageInput>() { new MultiLanguageInput("en", documentid + ":" + text, text) })).Result; foreach (var document in result2.Documents) { Console.WriteLine("Document ID: {0} ", document.Id); Console.WriteLine("\t Entities:"); foreach (EntityRecord entity in document.Entities) { Console.WriteLine("\t\t" + entity.Name + " " + entity.WikipediaUrl); } } }
public static List <SentimentResults> fullAnalysis(List <DB_Service.CrimeTweets> crimeTweets) {//------------------------------------------------------------------------------------------------------------------------------------- // Create a client. ITextAnalyticsClient client = new TextAnalyticsClient(new ApiKeyServiceClientCredentials()) { Endpoint = "https://westeurope.api.cognitive.microsoft.com" //https://westeurope.api.cognitive.microsoft.com/text/analytics/v2.0 }; //------------------------------------------------------------------------------------------------------------------------------------- // Extracting language List <Input> myInp = new List <Input>(); foreach (DB_Service.CrimeTweets ct in crimeTweets) { Input inp = new Input(ct.tweet_id.ToString(), ct.message); myInp.Add(inp); } var result = client.DetectLanguageAsync(new BatchInput(myInp)).Result; List <SentimentResults> tweetLangs = new List <SentimentResults>(); // Printing language results. foreach (var document in result.Documents) { SentimentResults sr = new SentimentResults(); sr.setTweet_id(Int32.Parse(document.Id)); sr.setLanguage_short(document.DetectedLanguages[0].Iso6391Name); sr.setLanguage(document.DetectedLanguages[0].Name); tweetLangs.Add(sr); } //------------------------------------------------------------------------------------------------------------------------------------- // Getting key-phrases List <MultiLanguageInput> keyPhrases = new List <MultiLanguageInput>();//Key phrases int count = 0; foreach (DB_Service.CrimeTweets ct in crimeTweets) { string tempLang = tweetLangs.ElementAt <SentimentResults>(count).getLanguage_short(); MultiLanguageInput inp = new MultiLanguageInput(tempLang, ct.tweet_id.ToString(), ct.message); keyPhrases.Add(inp); count++; } KeyPhraseBatchResult result2 = client.KeyPhrasesAsync(new MultiLanguageBatchInput(keyPhrases)).Result; // Printing keyphrases List <string> phrases = new List <string>(); List <SentimentResults> tweetKeyPhrases = new List <SentimentResults>(); count = 0; foreach (var document in result2.Documents) { foreach (string keyphrase in document.KeyPhrases) { phrases.Add(keyphrase); } SentimentResults sr = new SentimentResults(); sr = tweetLangs.ElementAt <SentimentResults>(count); sr.setKeyPhrases(phrases); tweetKeyPhrases.Add(sr); count++; } //------------------------------------------------------------------------------------------------------------------------------------- // Getting Sentiment Analysis List <MultiLanguageInput> sentiAni = new List <MultiLanguageInput>();//Sentiment Analysis count = 0; foreach (DB_Service.CrimeTweets ct in crimeTweets) { string tempLang = tweetKeyPhrases.ElementAt <SentimentResults>(count).getLanguage_short(); MultiLanguageInput inp = new MultiLanguageInput(tempLang, ct.tweet_id.ToString(), ct.message); sentiAni.Add(inp); count++; } SentimentBatchResult result3 = client.SentimentAsync(new MultiLanguageBatchInput(sentiAni)).Result; // Printing sentiment results List <SentimentResults> tweetSentiments = new List <SentimentResults>(); count = 0; foreach (var document in result3.Documents) { SentimentResults sr = new SentimentResults(); sr = tweetKeyPhrases.ElementAt <SentimentResults>(count); sr.setSenti_score((double)document.Score); tweetSentiments.Add(sr); count++; } //------------------------------------------------------------------------------------------------------------------------------------- // Getting Entities //Continue using the same list so languages wont change EntitiesBatchResult result4 = client.EntitiesAsync(new MultiLanguageBatchInput(sentiAni)).Result; // Printing entities results List <string> entitiySet = new List <string>(); List <SentimentResults> tweetEntities = new List <SentimentResults>(); count = 0; foreach (var document in result4.Documents) { foreach (EntityRecord entitiy in document.Entities) { entitiySet.Add(entitiy.Name); } SentimentResults sr = new SentimentResults(); sr = tweetSentiments.ElementAt <SentimentResults>(count); sr.setEntities(entitiySet); tweetEntities.Add(sr); count++; } //------------------------------------------------------------------------------------------------------------------------------------- //Add Data to Database Service List <DB_Service.Sentiments> completeSentiments = new List <DB_Service.Sentiments>(); List <DB_Service.Entities> completeEntities = new List <DB_Service.Entities>(); foreach (SentimentResults finalResults in tweetEntities) { //Start building Sentiment class DB_Service.Sentiments newSenti = new DB_Service.Sentiments(); newSenti.tweet_id = finalResults.getTweet_id(); newSenti.sentiment_total = finalResults.getSenti_score(); newSenti.category_primary = finalResults.getLanguage() + ", " + finalResults.getLanguage_short(); List <string> entList = finalResults.getEntities(); List <string> phraseList = finalResults.getKeyPhrases(); StringBuilder wholePhrase = new StringBuilder(""); count = 0; //Start building Entity Class if (entList != null && entList.Count > 0) { foreach (string entity in entList) { wholePhrase.Append(entity + ","); //DB_Service.Entities newEntity = new DB_Service.Entities(); //newEntity.name = entity; //newEntity.sentiment_id = -1//this is a programming design problem } } if (phraseList != null && phraseList.Count > 0) { foreach (string word in phraseList) { count++; if (phraseList.Count > count) { wholePhrase.Append(word + ","); } else { wholePhrase.Append(word); } } } newSenti.key_phrases = wholePhrase.ToString(); //List<string> EntList = finalResults.getEntities(); //if(EntList != null && EntList.Count > 0) //{ // //newSenti.category_primary = EntList.ElementAt<string>(0); //} //else //{ // newSenti.category_primary = ""; //} //Finish building Sentiment Class completeSentiments.Add(newSenti); } //Add to service now DB_Service.ServiceClient service = new DB_Service.ServiceClient(); service.addSentiments(completeSentiments); return(tweetEntities); }
static void Main(string[] args) { // Create a client. ITextAnalyticsClient client = new TextAnalyticsClient(new ApiKeyServiceClientCredentials()) { Endpoint = "https://westus.api.cognitive.microsoft.com" }; //Replace 'westus' with the correct region for your Text Analytics subscription Console.OutputEncoding = System.Text.Encoding.UTF8; // Extracting language Console.WriteLine("===== LANGUAGE EXTRACTION ======"); var result = client.DetectLanguageAsync(new BatchInput( new List <Input>() { new Input("1", "This is a document written in English."), new Input("2", "Este es un document escrito en Español."), new Input("3", "这是一个用中文写的文件") })).Result; // Printing language results. foreach (var document in result.Documents) { Console.WriteLine("Document ID: {0} , Language: {1}", document.Id, document.DetectedLanguages[0].Name); } // Getting key-phrases Console.WriteLine("\n\n===== KEY-PHRASE EXTRACTION ======"); KeyPhraseBatchResult result2 = client.KeyPhrasesAsync(new MultiLanguageBatchInput( new List <MultiLanguageInput>() { new MultiLanguageInput("ja", "1", "猫は幸せ"), new MultiLanguageInput("de", "2", "Fahrt nach Stuttgart und dann zum Hotel zu Fu."), new MultiLanguageInput("en", "3", "My cat is stiff as a rock."), new MultiLanguageInput("es", "4", "A mi me encanta el fútbol!") })).Result; // Printing keyphrases foreach (var document in result2.Documents) { Console.WriteLine("Document ID: {0} ", document.Id); Console.WriteLine("\t Key phrases:"); foreach (string keyphrase in document.KeyPhrases) { Console.WriteLine("\t\t" + keyphrase); } } // Extracting sentiment Console.WriteLine("\n\n===== SENTIMENT ANALYSIS ======"); SentimentBatchResult result3 = client.SentimentAsync( new MultiLanguageBatchInput( new List <MultiLanguageInput>() { new MultiLanguageInput("en", "0", "I had the best day of my life."), new MultiLanguageInput("en", "1", "This was a waste of my time. The speaker put me to sleep."), new MultiLanguageInput("es", "2", "No tengo dinero ni nada que dar..."), new MultiLanguageInput("it", "3", "L'hotel veneziano era meraviglioso. È un bellissimo pezzo di architettura."), })).Result; // Printing sentiment results foreach (var document in result3.Documents) { Console.WriteLine("Document ID: {0} , Sentiment Score: {1:0.00}", document.Id, document.Score); } // Identify entities Console.WriteLine("\n\n===== ENTITIES ======"); EntitiesBatchResult result4 = client.EntitiesAsync( new MultiLanguageBatchInput( new List <MultiLanguageInput>() { new MultiLanguageInput("en", "0", "The Great Depression began in 1929. By 1933, the GDP in America fell by 25%.") })).Result; // Printing entities results foreach (var document in result4.Documents) { Console.WriteLine("Document ID: {0} ", document.Id); Console.WriteLine("\t Entities:"); foreach (EntityRecord entity in document.Entities) { Console.WriteLine("\t\t" + entity.Name); } } Console.ReadLine(); }
static void Main(string[] args) { // Create a client. TextAnalyticsAPI client = new TextAnalyticsAPI(new ApiKeyServiceClientCredentials()) { AzureRegion = AzureRegions.Eastus }; Console.OutputEncoding = System.Text.Encoding.UTF8; Console.WriteLine("===== MS_CA_TEXT_ANALYSIS ======"); Console.WriteLine("==== KEY PHRASE & SENTIMENT ===="); Console.WriteLine("Type 'quit' to exit application"); string text; do { Console.WriteLine("\nText to Analyze:"); text = Console.ReadLine(); //** GATHER (POST) //Key Phrase(s) KeyPhraseBatchResult result_k = client.KeyPhrasesAsync(new MultiLanguageBatchInput( new List <MultiLanguageInput>() { new MultiLanguageInput("en", "3", text), })).Result; //Sentiment SentimentBatchResult result_s = client.SentimentAsync(new MultiLanguageBatchInput( new List <MultiLanguageInput>() { new MultiLanguageInput("en", "3", text), })).Result; //Entities EntitiesBatchResult result_e = client.EntitiesAsync(new MultiLanguageBatchInput( new List <MultiLanguageInput>() { new MultiLanguageInput("en", "3", text), })).Result; //** PRINT (RESPONSE) //Sentiment foreach (var document in result_s.Documents) { Console.WriteLine("\n Sentiment: "); Console.WriteLine("\t " + Math.Round(document.Score.Value, 3, MidpointRounding.AwayFromZero)); } //Key Phrase(s) foreach (var document in result_k.Documents) { Console.WriteLine("\n Key Phrases:"); foreach (string keyphrase in document.KeyPhrases) { Console.WriteLine("\t " + keyphrase); } } //Entities foreach (var document in result_e.Documents) { Console.WriteLine("\n Entities:"); foreach (var entity in document.Entities) { Console.WriteLine("\t " + entity.Name + " (" + entity.WikipediaUrl + ")"); } } } while (text != "quit"); }
/// <summary> /// Text Analytics - V2 - Key Phrases & Entities /// </summary> /// <param name="keyPhrasesSamples"></param> /// <returns></returns> public static Tuple <KeyPhraseBatchResult, EntitiesBatchResult> TextAnalyticsKeyPhrasesAndEntities(List <KeyValuePair <string, string> > keyPhrasesSamples, ref CognitiveServicesApiCalls cognitiveServicesApiCalls) { var creds = new ApiKeyServiceClientCredentials(); // Build client API call ITextAnalyticsClient client = new TextAnalyticsClient(creds) { Endpoint = Config.COGNITIVE_SERVICES_REGION_URI }; // Getting key-phrases var lengthofText = keyPhrasesSamples.Select((v, i) => v.Value.ToString().Length).Sum(); //Console.WriteLine(string.Format("\tDocs: {0}", keyPhrasesSamples.Count)); Console.WriteLine(string.Format("\tCharacters: {0}", lengthofText)); var multiLanguageInputs = (keyPhrasesSamples.Select((v, i) => new MultiLanguageInput(v.Key, i.ToString(), v.Value)).ToList()); var multiLanguageInputsString = String.Join(string.Empty, multiLanguageInputs.Select(a => a.Text).ToList()); //Console.WriteLine("OCR Text Sent for key phrases: " + Math.Round(mb, 3)); // Send batches of 100 inputs int batches = multiLanguageInputs.Count / 100 + 1; var test = new Tuple <KeyPhraseBatchResult, EntitiesBatchResult>(null, null); ; var keyPhraseBatchResults = new List <KeyPhraseBatchResult>(); var entityBatchResults = new List <EntitiesBatchResult>(); for (int i = 0; i != batches; i++) { // set up the batches var multiLanguageInputsToProcess = multiLanguageInputs.Skip(i * 100).Take(100).ToList(); if (multiLanguageInputsToProcess.Count > 0) { var multiLanguageBatch = new MultiLanguageBatchInput(multiLanguageInputsToProcess); Console.WriteLine(string.Format("\tProcessing Batch {0} of {1}", (i + 1), batches)); // key phrases result var keyPhraseMiniBatchResult = client.KeyPhrasesAsync(true, new MultiLanguageBatchInput(multiLanguageInputsToProcess)).Result; keyPhraseBatchResults.Add(keyPhraseMiniBatchResult); cognitiveServicesApiCalls.ApiCallV2Count++; var entitiesMiniBatchResult = client.EntitiesAsync(true, new MultiLanguageBatchInput(multiLanguageInputsToProcess)).Result; entityBatchResults.Add(entitiesMiniBatchResult); cognitiveServicesApiCalls.ApiCallV2Count++; } } var keyPhraseDocuments = keyPhraseBatchResults.SelectMany(i => i.Documents).ToList(); var keyPhraseErrors = keyPhraseBatchResults.SelectMany(i => i.Errors).ToList(); var keyPhraseBatchResult = new KeyPhraseBatchResult(keyPhraseDocuments, keyPhraseErrors); var entitiesDcouments = entityBatchResults.SelectMany(i => i.Documents).ToList(); var entitiesErrors = entityBatchResults.SelectMany(i => i.Errors).ToList(); var entitiesBatchResult = new EntitiesBatchResult(entitiesDcouments, entitiesErrors); //var tuple = (KeyPhraseBatchResult: keyPhraseBatchResult, EntitiesBatchResult: entitiesBatchResult); return(new Tuple <KeyPhraseBatchResult, EntitiesBatchResult>(keyPhraseBatchResult, entitiesBatchResult)); }