Class to hold result of Key Phrases call
示例#1
0
        public static void UploadDocuments(SearchIndexClient indexClient, string fileId, string fileName, string ocrText, KeyPhraseResult keyPhraseResult)
        {
            List<IndexAction> indexOperations = new List<IndexAction>();
            var doc = new Document();
            doc.Add("fileId", fileId);
            doc.Add("fileName", fileName);
            doc.Add("ocrText", ocrText);
            doc.Add("keyPhrases", keyPhraseResult.KeyPhrases.ToList());
            indexOperations.Add(IndexAction.Upload(doc));

            try
            {
                indexClient.Documents.Index(new IndexBatch(indexOperations));
            }
            catch (IndexBatchException e)
            {
                // Sometimes when your Search service is under load, indexing will fail for some of the documents in
                // the batch. Depending on your application, you can take compensating actions like delaying and
                // retrying. For this simple demo, we just log the failed document keys and continue.
                Console.WriteLine(
                "Failed to index some of the documents: {0}",
                       String.Join(", ", e.IndexingResults.Where(r => !r.Succeeded).Select(r => r.Key)));
            }

        }
示例#2
0
        static void Main(string[] args)
        {
            var searchPath = "pdf";
            var outPath    = "image";

            // Note, this will create a new Azure Search Index for the OCR text
            Console.WriteLine("Creating Azure Search index...");
            AzureSearch.CreateIndex(serviceClient, indexName);
            // Creating an image directory
            if (Directory.Exists(outPath) == false)
            {
                Directory.CreateDirectory(outPath);
            }
            foreach (var filename in Directory.GetFiles(searchPath, "*.pdf", SearchOption.TopDirectoryOnly))
            {
                Console.WriteLine("Extracting images from {0} \r\n", System.IO.Path.GetFileName(filename));
                var images = PdfImageExtractor.ExtractImages(filename);
                Console.WriteLine("{0} images found.", images.Count);
                Console.WriteLine();
                var directory = System.IO.Path.GetDirectoryName(filename);
                foreach (var name in images.Keys)
                {
                    if (name.LastIndexOf(".") + 1 != name.Length)
                    {
                        images[name].Save(Path.Combine(outPath, name));
                    }
                }
                string ocrText = string.Empty;
                Console.WriteLine("Extracting text from image... \r\n");
                foreach (var imagefilename in Directory.GetFiles(outPath))
                {
                    OcrResults ocr = vision.RecognizeText(imagefilename);
                    ocrText += vision.GetRetrieveText(ocr);
                    File.Delete(imagefilename);
                }
                Console.WriteLine("Extracting key phrases from processed text... \r\n");
                KeyPhraseResult keyPhraseResult = TextExtraction.ProcessText(ocrText);
                // Take the resulting orcText and upload to a new Azure Search Index
                // It is highly recommended that you upload documents in batches rather
                // individually like is done here
                if (ocrText.Length > 0)
                {
                    Console.WriteLine("Uploading extracted text to Azure Search...\r\n");
                    string fileNameOnly = System.IO.Path.GetFileName(filename);
                    string fileId       = System.Convert.ToBase64String(System.Text.Encoding.UTF8.GetBytes(fileNameOnly));
                    AzureSearch.UploadDocuments(indexClient, fileId, fileNameOnly, ocrText, keyPhraseResult);
                }
            }
            // Execute a test search
            Console.WriteLine("Execute Search...");
            AzureSearch.SearchDocuments(indexClient, "Azure Search");
            Console.WriteLine("All done.  Press any key to continue.");
            Console.ReadLine();
        }
示例#3
0
        public static void UploadDocuments(SearchIndexClient indexClient, string fileId, string fileName, string ocrText, KeyPhraseResult keyPhraseResult)
        {
            List <IndexAction> indexOperations = new List <IndexAction>();
            var doc = new Document();

            doc.Add("fileId", fileId);
            doc.Add("fileName", fileName);
            doc.Add("ocrText", ocrText);
            doc.Add("keyPhrases", keyPhraseResult.KeyPhrases.ToList());
            indexOperations.Add(IndexAction.Upload(doc));

            try
            {
                indexClient.Documents.Index(new IndexBatch(indexOperations));
            }
            catch (IndexBatchException e)
            {
                // Sometimes when your Search service is under load, indexing will fail for some of the documents in
                // the batch. Depending on your application, you can take compensating actions like delaying and
                // retrying. For this simple demo, we just log the failed document keys and continue.
                Console.WriteLine(
                    "Failed to index some of the documents: {0}",
                    String.Join(", ", e.IndexingResults.Where(r => !r.Succeeded).Select(r => r.Key)));
            }
        }
示例#4
0
        public static KeyPhraseResult  ProcessText(string inputText)
        {
            string          accountKey      = ConfigurationManager.AppSettings["textExtractionAccountKey"];
            KeyPhraseResult keyPhraseResult = new KeyPhraseResult();

            using (var httpClient = new HttpClient())
            {
                string inputTextEncoded = HttpUtility.UrlEncode(inputText);
                httpClient.BaseAddress = new Uri(ServiceBaseUri);
                string creds = "AccountKey:" + accountKey;
                string authorizationHeader = "Basic " + Convert.ToBase64String(Encoding.ASCII.GetBytes(creds));
                httpClient.DefaultRequestHeaders.Add("Authorization", authorizationHeader);
                httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
                // get key phrases
                string keyPhrasesRequest = "data.ashx/amla/text-analytics/v1/GetKeyPhrases?Text=" + inputTextEncoded;
                Task <HttpResponseMessage> responseTask = httpClient.GetAsync(keyPhrasesRequest);
                responseTask.Wait();
                HttpResponseMessage response    = responseTask.Result;
                Task <string>       contentTask = response.Content.ReadAsStringAsync();
                contentTask.Wait();
                string content = contentTask.Result;
                if (!response.IsSuccessStatusCode)
                {
                    throw new Exception("Call to get key phrases failed with HTTP status code: " +
                                        response.StatusCode + " and contents: " + content);
                }
                keyPhraseResult = JsonConvert.DeserializeObject <KeyPhraseResult>(content);
                Console.WriteLine("Key phrases: {0} \r\n", string.Join(",", keyPhraseResult.KeyPhrases));

                // Uncomment the following if you want to retrieve additional details on this text

                //// get sentiment
                //string sentimentRequest = "data.ashx/amla/text-analytics/v1/GetSentiment?Text=" + inputTextEncoded;
                //responseTask = httpClient.GetAsync(sentimentRequest);
                //responseTask.Wait();
                //response = responseTask.Result;
                //contentTask = response.Content.ReadAsStringAsync();
                //contentTask.Wait();
                //content = contentTask.Result;
                //if (!response.IsSuccessStatusCode)
                //{
                //    throw new Exception("Call to get sentiment failed with HTTP status code: " +
                //                        response.StatusCode + " and contents: " + content);
                //}
                //SentimentResult sentimentResult = JsonConvert.DeserializeObject<SentimentResult>(content);
                //Console.WriteLine("Sentiment score: " + sentimentResult.Score);

                //// get the language in text
                //string languageRequest = "data.ashx/amla/text-analytics/v1/GetLanguage?Text=" + inputTextEncoded;
                //responseTask = httpClient.GetAsync(languageRequest);
                //responseTask.Wait();
                //response = responseTask.Result;
                //contentTask = response.Content.ReadAsStringAsync();
                //contentTask.Wait();
                //content = contentTask.Result;
                //if (!response.IsSuccessStatusCode)
                //{
                //    throw new Exception("Call to get language failed with HTTP status code: " +
                //                        response.StatusCode + " and contents: " + content);
                //}
                //LanguageResult languageResult = JsonConvert.DeserializeObject<LanguageResult>(content);
                //Console.WriteLine("Detected Languages: " + string.Join(",", languageResult.DetectedLanguages.Select(language => language.Name).ToArray()));
            }
            return(keyPhraseResult);
        }
示例#5
0
        public static void UploadDocuments(SearchIndexClient indexClient, string fileId, string fileName, string ocrText, KeyPhraseResult keyPhraseResult)
        {
            List <IndexAction> indexOperations = new List <IndexAction>();
            var doc = new Document();

            doc.Add("fileId", fileId);
            doc.Add("fileName", fileName);
            doc.Add("ocrText", ocrText);
            doc.Add("keyPhrases", keyPhraseResult.KeyPhrases.ToList());
            indexOperations.Add(IndexAction.Upload(doc));
            try
            {
                indexClient.Documents.Index(new IndexBatch(indexOperations));
            }
            catch (IndexBatchException e)
            {
                Console.WriteLine(
                    "Failed to index some of the documents: {0}",
                    String.Join(", ", e.IndexingResults.Where(r => !r.Succeeded).Select(r => r.Key)));
            }
        }
        public static KeyPhraseResult  ProcessText(string inputText)
        {
            string accountKey = ConfigurationManager.AppSettings["textExtractionAccountKey"];
            KeyPhraseResult keyPhraseResult = new KeyPhraseResult();
            using (var httpClient = new HttpClient())
            {
                string inputTextEncoded = HttpUtility.UrlEncode(inputText);
                httpClient.BaseAddress = new Uri(ServiceBaseUri);
                string creds = "AccountKey:" + accountKey;
                string authorizationHeader = "Basic " + Convert.ToBase64String(Encoding.ASCII.GetBytes(creds));
                httpClient.DefaultRequestHeaders.Add("Authorization", authorizationHeader);
                httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
                // get key phrases
                string keyPhrasesRequest = "data.ashx/amla/text-analytics/v1/GetKeyPhrases?Text=" + inputTextEncoded;
                Task<HttpResponseMessage> responseTask = httpClient.GetAsync(keyPhrasesRequest);
                responseTask.Wait();
                HttpResponseMessage response = responseTask.Result;
                Task<string> contentTask = response.Content.ReadAsStringAsync();
                contentTask.Wait();
                string content = contentTask.Result;
                if (!response.IsSuccessStatusCode)
                {
                    throw new Exception("Call to get key phrases failed with HTTP status code: " +
                                        response.StatusCode + " and contents: " + content);
                }
                keyPhraseResult = JsonConvert.DeserializeObject<KeyPhraseResult>(content);
                Console.WriteLine("Key phrases: {0} \r\n", string.Join(",", keyPhraseResult.KeyPhrases));

                // Uncomment the following if you want to retrieve additional details on this text 

                //// get sentiment
                //string sentimentRequest = "data.ashx/amla/text-analytics/v1/GetSentiment?Text=" + inputTextEncoded;
                //responseTask = httpClient.GetAsync(sentimentRequest);
                //responseTask.Wait();
                //response = responseTask.Result;
                //contentTask = response.Content.ReadAsStringAsync();
                //contentTask.Wait();
                //content = contentTask.Result;
                //if (!response.IsSuccessStatusCode)
                //{
                //    throw new Exception("Call to get sentiment failed with HTTP status code: " +
                //                        response.StatusCode + " and contents: " + content);
                //}
                //SentimentResult sentimentResult = JsonConvert.DeserializeObject<SentimentResult>(content);
                //Console.WriteLine("Sentiment score: " + sentimentResult.Score);
                
                //// get the language in text
                //string languageRequest = "data.ashx/amla/text-analytics/v1/GetLanguage?Text=" + inputTextEncoded;
                //responseTask = httpClient.GetAsync(languageRequest);
                //responseTask.Wait();
                //response = responseTask.Result;
                //contentTask = response.Content.ReadAsStringAsync();
                //contentTask.Wait();
                //content = contentTask.Result;
                //if (!response.IsSuccessStatusCode)
                //{
                //    throw new Exception("Call to get language failed with HTTP status code: " +
                //                        response.StatusCode + " and contents: " + content);
                //}
                //LanguageResult languageResult = JsonConvert.DeserializeObject<LanguageResult>(content);
                //Console.WriteLine("Detected Languages: " + string.Join(",", languageResult.DetectedLanguages.Select(language => language.Name).ToArray()));

            }
            return keyPhraseResult;
        }