public static async Task RunAsync([BlobTrigger("resumeuploadcontainer/{name}", Connection = "AzureWebJobsStorage")] Stream myBlob, string name, Uri uri, TraceWriter log) { log.Info($"C# Blob trigger function Processed blob\n Name:{name} \n Size: {myBlob.Length} Bytes"); string[] spiltfilenamewithext = name.Split('.'); string filenamewithoutextension = spiltfilenamewithext[0]; if (IsValidExtension(spiltfilenamewithext[spiltfilenamewithext.Length - 1])) { PdfReader pdfReader = new PdfReader(myBlob); ResumeDocModel resumeDocModel = new ResumeDocModel { DocumentName = filenamewithoutextension, DocumentUri = uri.AbsoluteUri }; CognitiveTextAnalytics.TextAnalytics(pdfReader, log, resumeDocModel); string[] resumeKeysArray = ResumeKeyWords.Split(new string[] { "," }, StringSplitOptions.None); bool isResumeDoc = false; foreach (string keys in resumeKeysArray) { isResumeDoc = resumeDocModel.keyPhraseBatchResult.KeyPhrases.Any(s => s.IndexOf(keys, StringComparison.OrdinalIgnoreCase) >= 0); if (isResumeDoc) { resumeDocModel.docType = DocType.Resume; break; } } if (!isResumeDoc) { resumeDocModel.docType = DocType.Others; } List <CloudBlockBlob> cloudBlocks = DocumentExtraction.ExtractImageUploadToAzure(pdfReader, myBlob, log, filenamewithoutextension, resumeDocModel); if (cloudBlocks.Count > 0) { log.Info("===== Computer Vision Analysis Started ======"); ImageBatchResult imageBatchResult = new ImageBatchResult(); foreach (CloudBlockBlob cloudBlockBlob in cloudBlocks) { string blobUrlWithSAS = AzStorage.GetBlobSasUri(cloudBlockBlob); await CognitiveComputerVision.VisionAnalyticsAsync(log, blobUrlWithSAS, imageBatchResult); } resumeDocModel.imageBatchResult = imageBatchResult; log.Info("===== Computer Vision Analysis Completed ======"); } else { log.Info("The Document doesn't not have any Images to analyze"); } CosmosDB cosmosDB = new CosmosDB(); cosmosDB.CreateDocumentDB().Wait(); await cosmosDB.UpdInsResumeDocumentAsync(resumeDocModel); } else { log.Info("Please upload the valid Document in " + string.Join(",", _validExtensions) + " Extension"); } }
public async Task UpdInsResumeDocumentAsync(ResumeDocModel resumeDoc) { FeedOptions queryOptions = new FeedOptions { MaxItemCount = 1, EnableCrossPartitionQuery = true }; List <ResumeDocModel> resumeQueryResult = client.CreateDocumentQuery <ResumeDocModel>( UriFactory.CreateDocumentCollectionUri(DatabaseId, CollectionId), "SELECT * FROM Resume WHERE Resume.DocumentName='" + resumeDoc.DocumentName + "'", queryOptions).ToList(); if (resumeQueryResult.Count() == 0) { await this.client.CreateDocumentAsync(UriFactory.CreateDocumentCollectionUri(DatabaseId, CollectionId), resumeDoc); } else { resumeDoc.id = resumeQueryResult.First().id; await this.client.ReplaceDocumentAsync(UriFactory.CreateDocumentUri(DatabaseId, CollectionId, resumeQueryResult.First().id), resumeDoc); } }
public static void TextAnalytics(PdfReader pdfReader, TraceWriter log, ResumeDocModel resumeDocModel) { // Create a client. ITextAnalyticsClient client = new TextAnalyticsClient(new ApiKeyServiceClientCredentialsForText()) { Endpoint = TextAnalyticsEndPoint }; //Replace 'westus' with the correct region for your Text Analytics subscription Console.OutputEncoding = System.Text.Encoding.UTF8; // Extracting language log.Info("===== Text Analytics Started ======"); string content = DocumentExtraction.GetTextFromPDF(pdfReader); List <string> splittedList = StringExtensions.Split(content, MaxLengthofCharacters).ToList(); var LanguageDetectAPI = client.DetectLanguageAsync(new BatchInput( new List <Input>() { new Input(resumeDocModel.DocumentName, splittedList.First()) })).Result; resumeDocModel.languageBatchResult = LanguageDetectAPI.Documents.FirstOrDefault(); var detectedLanguage = LanguageDetectAPI.Documents.Select(doc => doc.DetectedLanguages[0].Iso6391Name).FirstOrDefault(); //SentimentBatchResult result3 = client.SentimentAsync( // new MultiLanguageBatchInput( // new List<MultiLanguageInput>() // { // new MultiLanguageInput(detectedLanguage, resumeDocModel.DocumentName, splittedList.First()) // })).Result; //resumeDocModel.sentimentBatchResult = result3.Documents.FirstOrDefault(); List <string> keyPhraseList = new List <string>(); List <EntityRecordV2dot1> entityRecords = new List <EntityRecordV2dot1>(); foreach (string splittedContent in splittedList) { KeyPhraseBatchResult keyPhraseBatch = client.KeyPhrasesAsync(new MultiLanguageBatchInput( new List <MultiLanguageInput>() { new MultiLanguageInput(detectedLanguage, resumeDocModel.DocumentName, splittedContent) })).Result; foreach (var doc in keyPhraseBatch.Documents) { keyPhraseList.AddRange(doc.KeyPhrases.ToList()); } EntitiesBatchResultV2dot1 entitiesbatchres = client.EntitiesAsync( new MultiLanguageBatchInput( new List <MultiLanguageInput>() { new MultiLanguageInput(detectedLanguage, resumeDocModel.DocumentName, splittedContent) })).Result; entityRecords.AddRange(entitiesbatchres.Documents.First().Entities.ToList()); } resumeDocModel.keyPhraseBatchResult.Id = resumeDocModel.DocumentName; resumeDocModel.keyPhraseBatchResult.KeyPhrases = keyPhraseList; resumeDocModel.entityBatchResult.Id = resumeDocModel.DocumentName; resumeDocModel.entityBatchResult.EntityRecords = entityRecords; log.Info("===== Text Analytics Completed ======"); }
public static List <CloudBlockBlob> ExtractImageUploadToAzure(PdfReader pdfReader, Stream blob, TraceWriter log, string name, ResumeDocModel resumeDocModel) { RandomAccessFileOrArray raf = new RandomAccessFileOrArray(blob); List <CloudBlockBlob> cloudBlockBlobs = new List <CloudBlockBlob>(); AzureStorageModel azureStorageModel = AzStorage.GetBlobDirectoryList(name); if (azureStorageModel.IblobList.Count() > 0) { foreach (IListBlobItem listBlobItem in azureStorageModel.IblobList) { CloudBlockBlob cloudBlob = listBlobItem as CloudBlockBlob; cloudBlob.DeleteIfExists(); } } try { for (int pageNumber = 1; pageNumber <= pdfReader.NumberOfPages; pageNumber++) { PdfDictionary pg = pdfReader.GetPageN(pageNumber); List <Image> IlistImages = GetImagesFromPdfDict(pg, pdfReader); EncoderParameters parms = new EncoderParameters(1); parms.Param[0] = new EncoderParameter(System.Drawing.Imaging.Encoder.Quality, 90L); var encoder = ImageCodecInfo.GetImageEncoders().First(c => c.FormatID == ImageFormat.Jpeg.Guid); foreach (Image image in IlistImages) { MemoryStream ms = new MemoryStream(); CloudBlockBlob cloudBlob = azureStorageModel.Container.GetBlockBlobReference(name + "/" + Guid.NewGuid() + ".jpeg"); image.Save(ms, encoder, parms); ms.Seek(0, SeekOrigin.Begin); // otherwise you'll get zero byte files cloudBlob.UploadFromStream(ms); cloudBlockBlobs.Add(cloudBlob); resumeDocModel.imageDetails.imageURLList.Add(cloudBlob.Uri.ToString()); } } } catch { throw; } finally { pdfReader.Close(); } return(cloudBlockBlobs); }