public static async Task Run(Stream blobStream, string name, TraceWriter log) { log.Info($"Processing blob:{name}"); // parse the document to extract images IEnumerable <PageImage> pages = DocumentParser.Parse(blobStream).Pages; // create and apply the skill set to create annotations SkillSet <PageImage> skillSet = CreateCognitiveSkillSet(); var annotations = await skillSet.ApplyAsync(pages); // Commit them to Cosmos DB to be used by full corpus skills such as Topics await cosmosDb.SaveAsync(annotations); // index the annotated document with azure search AnnotatedDocument document = new AnnotatedDocument(annotations.Select(a => a.Get <AnnotatedPage>("page-metadata"))); var searchDocument = new SearchDocument(name) { Metadata = document.Metadata, Text = document.Text, LinkedEntities = annotations .SelectMany(a => a.Get <EntityLink[]>("linked-entities") ?? new EntityLink[0]) .GroupBy(l => l.Name) .OrderByDescending(g => g.Max(l => l.Score)) .Select(l => l.Key) .ToList(), }; var batch = IndexBatch.MergeOrUpload(new[] { searchDocument }); var result = await indexClient.Documents.IndexAsync(batch); if (!result.Results[0].Succeeded) { log.Error($"index failed for {name}: {result.Results[0].ErrorMessage}"); } }
private static async Task <IEnumerable <Annotation> > ProcessDocument(Stream blobStream) { // parse the document to extract images IEnumerable <PageImage> pages = DocumentParser.Parse(blobStream).Pages; // create and apply the skill set to create annotations SkillSet <PageImage> skillSet = CreateCognitiveSkillSet(); var annotations = await skillSet.ApplyAsync(pages); return(annotations); }