public ModelBuilderFactory( IManifestExtractor manifestExtractor, IMetaExtractor metaExtractor, IFileExtractor fileExtractor, INavigationExtractor navigationExtractor) { this.manifestExtractor = manifestExtractor; this.metaExtractor = metaExtractor; this.fileExtractor = fileExtractor; this.navigationExtractor = navigationExtractor; }
public async Task Consume(ConsumeContext <ExtractMeta> context) { try { var blob = await blobStorage.GetFileAsync(context.Message.BlobId, context.Message.Bucket); IEnumerable <Property> meta = null; IMetaExtractor extractor = null; var tempFilePath = Path.GetTempFileName(); using (var fileStream = File.Create(tempFilePath)) { blob.GetContentAsStream().CopyTo(fileStream); } using (FileStream fs = new FileStream(tempFilePath, FileMode.Open, FileAccess.ReadWrite, FileShare.None, 4096, FileOptions.RandomAccess | FileOptions.DeleteOnClose)) { switch (Path.GetExtension(blob.Info.FileName).ToLower()) { case ".doc": case ".docx": case ".odt": extractor = new DocMetaExtractor(); meta = extractor.GetMeta(fs); break; case ".xls": case ".xlsx": case ".ods": extractor = new ExcelMetaExtractor(); meta = extractor.GetMeta(fs); break; case ".ppt": case ".pptx": case ".odp": extractor = new PresentationMetaExtractor(); meta = extractor.GetMeta(fs); break; default: await context.Publish <MetaExtractionFailed>(new { Id = context.Message.Id, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId, Message = $"Cannot find file converter for {blob.Info.FileName}" }); break; } await context.Publish <MetaExtracted>(new { Bucket = context.Message.Bucket, BlobId = context.Message.BlobId, Meta = meta, Id = context.Message.Id, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId }); } } catch (Exception e) { await context.Publish <MetaExtractionFailed>(new { Id = context.Message.Id, UserId = context.Message.UserId, TimeStamp = DateTimeOffset.UtcNow, CorrelationId = context.Message.CorrelationId, Message = $"Cannot convert file to pdf from bucket {context.Message.Bucket} with Id {context.Message.BlobId}. Error: {e.Message}" }); } }
public TextProcessor(ILinkExtractor linkExtractor, IHtmlExtractor htmlExtractor, IMetaExtractor metaExtractor) { _linkExtractor = linkExtractor; _htmlExtractor = htmlExtractor; _metaExtractor = metaExtractor; }