Пример #1
0
        public static async Task RunAsync([BlobTrigger("resumeuploadcontainer/{name}", Connection = "AzureWebJobsStorage")] Stream myBlob, string name, Uri uri, TraceWriter log)
        {
            log.Info($"C# Blob trigger function Processed blob\n Name:{name} \n Size: {myBlob.Length} Bytes");
            string[] spiltfilenamewithext     = name.Split('.');
            string   filenamewithoutextension = spiltfilenamewithext[0];

            if (IsValidExtension(spiltfilenamewithext[spiltfilenamewithext.Length - 1]))
            {
                PdfReader      pdfReader      = new PdfReader(myBlob);
                ResumeDocModel resumeDocModel = new ResumeDocModel {
                    DocumentName = filenamewithoutextension, DocumentUri = uri.AbsoluteUri
                };
                CognitiveTextAnalytics.TextAnalytics(pdfReader, log, resumeDocModel);
                string[] resumeKeysArray = ResumeKeyWords.Split(new string[] { "," }, StringSplitOptions.None);

                bool isResumeDoc = false;
                foreach (string keys in resumeKeysArray)
                {
                    isResumeDoc = resumeDocModel.keyPhraseBatchResult.KeyPhrases.Any(s => s.IndexOf(keys, StringComparison.OrdinalIgnoreCase) >= 0);
                    if (isResumeDoc)
                    {
                        resumeDocModel.docType = DocType.Resume;
                        break;
                    }
                }
                if (!isResumeDoc)
                {
                    resumeDocModel.docType = DocType.Others;
                }


                List <CloudBlockBlob> cloudBlocks = DocumentExtraction.ExtractImageUploadToAzure(pdfReader, myBlob, log, filenamewithoutextension, resumeDocModel);

                if (cloudBlocks.Count > 0)
                {
                    log.Info("===== Computer Vision Analysis Started ======");
                    ImageBatchResult imageBatchResult = new ImageBatchResult();
                    foreach (CloudBlockBlob cloudBlockBlob in cloudBlocks)
                    {
                        string blobUrlWithSAS = AzStorage.GetBlobSasUri(cloudBlockBlob);
                        await CognitiveComputerVision.VisionAnalyticsAsync(log, blobUrlWithSAS, imageBatchResult);
                    }
                    resumeDocModel.imageBatchResult = imageBatchResult;
                    log.Info("===== Computer Vision Analysis Completed ======");
                }
                else
                {
                    log.Info("The Document doesn't not have any Images to analyze");
                }

                CosmosDB cosmosDB = new CosmosDB();
                cosmosDB.CreateDocumentDB().Wait();
                await cosmosDB.UpdInsResumeDocumentAsync(resumeDocModel);
            }

            else
            {
                log.Info("Please upload the valid Document in " + string.Join(",", _validExtensions) + " Extension");
            }
        }
        public static void TextAnalytics(PdfReader pdfReader, TraceWriter log, ResumeDocModel resumeDocModel)
        {
            // Create a client.
            ITextAnalyticsClient client = new TextAnalyticsClient(new ApiKeyServiceClientCredentialsForText())
            {
                Endpoint = TextAnalyticsEndPoint
            }; //Replace 'westus' with the correct region for your Text Analytics subscription

            Console.OutputEncoding = System.Text.Encoding.UTF8;



            // Extracting language
            log.Info("===== Text Analytics Started ======");
            string content = DocumentExtraction.GetTextFromPDF(pdfReader);


            List <string> splittedList = StringExtensions.Split(content, MaxLengthofCharacters).ToList();

            var LanguageDetectAPI = client.DetectLanguageAsync(new BatchInput(
                                                                   new List <Input>()
            {
                new Input(resumeDocModel.DocumentName, splittedList.First())
            })).Result;

            resumeDocModel.languageBatchResult = LanguageDetectAPI.Documents.FirstOrDefault();

            var detectedLanguage = LanguageDetectAPI.Documents.Select(doc => doc.DetectedLanguages[0].Iso6391Name).FirstOrDefault();

            //SentimentBatchResult result3 = client.SentimentAsync(
            //   new MultiLanguageBatchInput(
            //       new List<MultiLanguageInput>()
            //       {
            //              new MultiLanguageInput(detectedLanguage, resumeDocModel.DocumentName, splittedList.First())
            //       })).Result;

            //resumeDocModel.sentimentBatchResult = result3.Documents.FirstOrDefault();

            List <string> keyPhraseList = new List <string>();

            List <EntityRecordV2dot1> entityRecords = new List <EntityRecordV2dot1>();

            foreach (string splittedContent in splittedList)
            {
                KeyPhraseBatchResult keyPhraseBatch = client.KeyPhrasesAsync(new MultiLanguageBatchInput(
                                                                                 new List <MultiLanguageInput>()
                {
                    new MultiLanguageInput(detectedLanguage, resumeDocModel.DocumentName, splittedContent)
                })).Result;

                foreach (var doc in keyPhraseBatch.Documents)
                {
                    keyPhraseList.AddRange(doc.KeyPhrases.ToList());
                }


                EntitiesBatchResultV2dot1 entitiesbatchres = client.EntitiesAsync(
                    new MultiLanguageBatchInput(
                        new List <MultiLanguageInput>()
                {
                    new MultiLanguageInput(detectedLanguage, resumeDocModel.DocumentName, splittedContent)
                })).Result;

                entityRecords.AddRange(entitiesbatchres.Documents.First().Entities.ToList());
            }

            resumeDocModel.keyPhraseBatchResult.Id         = resumeDocModel.DocumentName;
            resumeDocModel.keyPhraseBatchResult.KeyPhrases = keyPhraseList;

            resumeDocModel.entityBatchResult.Id            = resumeDocModel.DocumentName;
            resumeDocModel.entityBatchResult.EntityRecords = entityRecords;



            log.Info("===== Text Analytics Completed ======");
        }