コード例 #1
0
        public static async Task Run(
            [BlobTrigger("searchabledocuments/{name}.{ext}", Connection = "AzureWebJobsStorage")] Stream myBlob,
            string name, string ext, TraceWriter log)
        {
            // Because suffix filters don't work yet - this should take non-pdfs off the todo list
            if (ext.ToLower() != "pdf")
            {
                return;
            }

            log.Info($"Text Processing beginning for {name} ({myBlob.Length} Bytes)");

            log.Info($"Extracting text from the PDF (including OCR");
            var pages = iTextPDFHelper.GetPDFPages(myBlob, log, ocrImages: true);

            log.Info($"Calling Text Analytics to determine key phrases");
            Dictionary <string, int> keyPhrases = await TextAnalyticsHelper.GetKeyPhrases(pages, log);

            log.Info($"Uploading document to Azure Search");
            foreach (var page in pages)
            {
                string pageId = HttpServerUtility.UrlTokenEncode(Encoding.UTF8.GetBytes(name + "." + ext + page.Number));
                await AzureSearchHelper.UploadToAzureSearch(pageId, name + "." + ext, page.Number, page.KeyPhrases, page.Text, log);
            }
        }