public static string Extract(ReadOnlyMemory <byte> bytes) { if (bytes.Length < 6) { throw new UnsupportedFileException("Unable to indentify content type of file"); } var hex = HexBuilder.Build(bytes.Slice(0, 6)); // Check if its an openxml file. if (hex.StartsWith("504B0304") || hex.StartsWith("504B0506") || hex.StartsWith("504B0708")) { using (var ms = new MemoryStream(bytes.GetUnderlyingArray().Array)) using (var package = Package.Open(ms)) { if (package.PartExists(new Uri("/word/document.xml", UriKind.Relative))) { return(ContentType.Application.Docx); } else if (package.PartExists(new Uri("/xl/workbook.xml", UriKind.Relative))) { return(ContentType.Application.Xlsx); } else if (package.PartExists(new Uri("/ppt/presentation.xml", UriKind.Relative))) { return(ContentType.Application.Pptx); } else { throw new UnsupportedFileException("Unable to indentify content type of file"); } } } if (!HexMimeMappings.TryGetValue(hex, out var type)) { throw new UnsupportedFileException("Unable to indentify content type of file"); } return(type); }
private TextExtractor(ReadOnlyMemory <byte> bytes) { _stream = new MemoryStream(bytes.GetUnderlyingArray().Array); _contentType = ContentTypeExtractor.Extract(bytes); _disposeStream = true; }