Example #1
0
        public static async Task RunAsync([BlobTrigger("resumeuploadcontainer/{name}", Connection = "AzureWebJobsStorage")] Stream myBlob, string name, Uri uri, TraceWriter log)
        {
            log.Info($"C# Blob trigger function Processed blob\n Name:{name} \n Size: {myBlob.Length} Bytes");
            string[] spiltfilenamewithext     = name.Split('.');
            string   filenamewithoutextension = spiltfilenamewithext[0];

            if (IsValidExtension(spiltfilenamewithext[spiltfilenamewithext.Length - 1]))
            {
                PdfReader      pdfReader      = new PdfReader(myBlob);
                ResumeDocModel resumeDocModel = new ResumeDocModel {
                    DocumentName = filenamewithoutextension, DocumentUri = uri.AbsoluteUri
                };
                CognitiveTextAnalytics.TextAnalytics(pdfReader, log, resumeDocModel);
                string[] resumeKeysArray = ResumeKeyWords.Split(new string[] { "," }, StringSplitOptions.None);

                bool isResumeDoc = false;
                foreach (string keys in resumeKeysArray)
                {
                    isResumeDoc = resumeDocModel.keyPhraseBatchResult.KeyPhrases.Any(s => s.IndexOf(keys, StringComparison.OrdinalIgnoreCase) >= 0);
                    if (isResumeDoc)
                    {
                        resumeDocModel.docType = DocType.Resume;
                        break;
                    }
                }
                if (!isResumeDoc)
                {
                    resumeDocModel.docType = DocType.Others;
                }


                List <CloudBlockBlob> cloudBlocks = DocumentExtraction.ExtractImageUploadToAzure(pdfReader, myBlob, log, filenamewithoutextension, resumeDocModel);

                if (cloudBlocks.Count > 0)
                {
                    log.Info("===== Computer Vision Analysis Started ======");
                    ImageBatchResult imageBatchResult = new ImageBatchResult();
                    foreach (CloudBlockBlob cloudBlockBlob in cloudBlocks)
                    {
                        string blobUrlWithSAS = AzStorage.GetBlobSasUri(cloudBlockBlob);
                        await CognitiveComputerVision.VisionAnalyticsAsync(log, blobUrlWithSAS, imageBatchResult);
                    }
                    resumeDocModel.imageBatchResult = imageBatchResult;
                    log.Info("===== Computer Vision Analysis Completed ======");
                }
                else
                {
                    log.Info("The Document doesn't not have any Images to analyze");
                }

                CosmosDB cosmosDB = new CosmosDB();
                cosmosDB.CreateDocumentDB().Wait();
                await cosmosDB.UpdInsResumeDocumentAsync(resumeDocModel);
            }

            else
            {
                log.Info("Please upload the valid Document in " + string.Join(",", _validExtensions) + " Extension");
            }
        }
Example #2
0
        public async Task UpdInsResumeDocumentAsync(ResumeDocModel resumeDoc)
        {
            FeedOptions queryOptions = new FeedOptions {
                MaxItemCount = 1, EnableCrossPartitionQuery = true
            };

            List <ResumeDocModel> resumeQueryResult = client.CreateDocumentQuery <ResumeDocModel>(
                UriFactory.CreateDocumentCollectionUri(DatabaseId, CollectionId),
                "SELECT * FROM Resume WHERE Resume.DocumentName='" + resumeDoc.DocumentName + "'", queryOptions).ToList();

            if (resumeQueryResult.Count() == 0)
            {
                await this.client.CreateDocumentAsync(UriFactory.CreateDocumentCollectionUri(DatabaseId, CollectionId), resumeDoc);
            }
            else
            {
                resumeDoc.id = resumeQueryResult.First().id;
                await this.client.ReplaceDocumentAsync(UriFactory.CreateDocumentUri(DatabaseId, CollectionId, resumeQueryResult.First().id), resumeDoc);
            }
        }
        public static void TextAnalytics(PdfReader pdfReader, TraceWriter log, ResumeDocModel resumeDocModel)
        {
            // Create a client.
            ITextAnalyticsClient client = new TextAnalyticsClient(new ApiKeyServiceClientCredentialsForText())
            {
                Endpoint = TextAnalyticsEndPoint
            }; //Replace 'westus' with the correct region for your Text Analytics subscription

            Console.OutputEncoding = System.Text.Encoding.UTF8;



            // Extracting language
            log.Info("===== Text Analytics Started ======");
            string content = DocumentExtraction.GetTextFromPDF(pdfReader);


            List <string> splittedList = StringExtensions.Split(content, MaxLengthofCharacters).ToList();

            var LanguageDetectAPI = client.DetectLanguageAsync(new BatchInput(
                                                                   new List <Input>()
            {
                new Input(resumeDocModel.DocumentName, splittedList.First())
            })).Result;

            resumeDocModel.languageBatchResult = LanguageDetectAPI.Documents.FirstOrDefault();

            var detectedLanguage = LanguageDetectAPI.Documents.Select(doc => doc.DetectedLanguages[0].Iso6391Name).FirstOrDefault();

            //SentimentBatchResult result3 = client.SentimentAsync(
            //   new MultiLanguageBatchInput(
            //       new List<MultiLanguageInput>()
            //       {
            //              new MultiLanguageInput(detectedLanguage, resumeDocModel.DocumentName, splittedList.First())
            //       })).Result;

            //resumeDocModel.sentimentBatchResult = result3.Documents.FirstOrDefault();

            List <string> keyPhraseList = new List <string>();

            List <EntityRecordV2dot1> entityRecords = new List <EntityRecordV2dot1>();

            foreach (string splittedContent in splittedList)
            {
                KeyPhraseBatchResult keyPhraseBatch = client.KeyPhrasesAsync(new MultiLanguageBatchInput(
                                                                                 new List <MultiLanguageInput>()
                {
                    new MultiLanguageInput(detectedLanguage, resumeDocModel.DocumentName, splittedContent)
                })).Result;

                foreach (var doc in keyPhraseBatch.Documents)
                {
                    keyPhraseList.AddRange(doc.KeyPhrases.ToList());
                }


                EntitiesBatchResultV2dot1 entitiesbatchres = client.EntitiesAsync(
                    new MultiLanguageBatchInput(
                        new List <MultiLanguageInput>()
                {
                    new MultiLanguageInput(detectedLanguage, resumeDocModel.DocumentName, splittedContent)
                })).Result;

                entityRecords.AddRange(entitiesbatchres.Documents.First().Entities.ToList());
            }

            resumeDocModel.keyPhraseBatchResult.Id         = resumeDocModel.DocumentName;
            resumeDocModel.keyPhraseBatchResult.KeyPhrases = keyPhraseList;

            resumeDocModel.entityBatchResult.Id            = resumeDocModel.DocumentName;
            resumeDocModel.entityBatchResult.EntityRecords = entityRecords;



            log.Info("===== Text Analytics Completed ======");
        }
        public static List <CloudBlockBlob> ExtractImageUploadToAzure(PdfReader pdfReader, Stream blob, TraceWriter log, string name, ResumeDocModel resumeDocModel)
        {
            RandomAccessFileOrArray raf               = new RandomAccessFileOrArray(blob);
            List <CloudBlockBlob>   cloudBlockBlobs   = new List <CloudBlockBlob>();
            AzureStorageModel       azureStorageModel = AzStorage.GetBlobDirectoryList(name);

            if (azureStorageModel.IblobList.Count() > 0)
            {
                foreach (IListBlobItem listBlobItem in azureStorageModel.IblobList)
                {
                    CloudBlockBlob cloudBlob = listBlobItem as CloudBlockBlob;
                    cloudBlob.DeleteIfExists();
                }
            }
            try
            {
                for (int pageNumber = 1; pageNumber <= pdfReader.NumberOfPages; pageNumber++)
                {
                    PdfDictionary     pg          = pdfReader.GetPageN(pageNumber);
                    List <Image>      IlistImages = GetImagesFromPdfDict(pg, pdfReader);
                    EncoderParameters parms       = new EncoderParameters(1);
                    parms.Param[0] = new EncoderParameter(System.Drawing.Imaging.Encoder.Quality, 90L);
                    var encoder = ImageCodecInfo.GetImageEncoders().First(c => c.FormatID == ImageFormat.Jpeg.Guid);
                    foreach (Image image in IlistImages)
                    {
                        MemoryStream   ms        = new MemoryStream();
                        CloudBlockBlob cloudBlob = azureStorageModel.Container.GetBlockBlobReference(name + "/" + Guid.NewGuid() + ".jpeg");
                        image.Save(ms, encoder, parms);
                        ms.Seek(0, SeekOrigin.Begin); // otherwise you'll get zero byte files
                        cloudBlob.UploadFromStream(ms);
                        cloudBlockBlobs.Add(cloudBlob);
                        resumeDocModel.imageDetails.imageURLList.Add(cloudBlob.Uri.ToString());
                    }
                }
            }
            catch
            {
                throw;
            }
            finally
            {
                pdfReader.Close();
            }
            return(cloudBlockBlobs);
        }