public string Download(BlobId blobId, string folder) { if (blobId == null) { throw new ArgumentNullException(nameof(blobId)); } if (String.IsNullOrEmpty(folder)) { throw new ArgumentNullException(nameof(folder)); } if (!Directory.Exists(folder)) { throw new ArgumentException($"folder {folder} does not exists", nameof(folder)); } var descriptor = _mongodDbFileSystemBlobDescriptorStorage.FindOneById(blobId); if (descriptor == null) { throw new ArgumentException($"Descriptor for {blobId} not found in {_mongodDbFileSystemBlobDescriptorStorage.GetType().Name}"); } var localFileName = _directoryManager.GetFileNameFromBlobId(blobId, descriptor.FileNameWithExtension); if (!File.Exists(localFileName)) { Logger.Error($"Blob {blobId} has descriptor, but blob file {localFileName} not found in the system."); throw new ArgumentException($"Blob {blobId} not found"); } var originalFileName = descriptor.FileNameWithExtension.ToString(); string destinationFileName = Path.Combine(folder, originalFileName); Int32 uniqueId = 1; while (File.Exists(destinationFileName)) { destinationFileName = Path.Combine(folder, Path.GetFileNameWithoutExtension(originalFileName) + $" ({uniqueId++})") + Path.GetExtension(originalFileName); } File.Copy(localFileName, destinationFileName); if (Logger.IsDebugEnabled) { Logger.Debug($"Blob {blobId} downloaded in folder {folder} with name {destinationFileName}"); } return(destinationFileName); }
private Process GetLocalProcessForQueue(String queueId, String executableName) { var processFileName = Path.GetFileNameWithoutExtension(executableName); var processes = Process.GetProcessesByName(processFileName, Environment.MachineName); foreach (Process process in processes) { var cmdLine = GetCommandLine(process); if (cmdLine.Contains("/queue:" + queueId)) { return(process); } } return(null); }
protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder) { var fileExtension = parameters.All[JobKeys.ThumbnailFormat]; ImageFormat format = GetFormatFromExtension(fileExtension); var sizesAsString = parameters.All[JobKeys.Sizes]; var imageSizes = SizeInfoHelper.Deserialize(sizesAsString); Logger.DebugFormat("Starting resize job for {0} - {1}", parameters.JobId, sizesAsString); string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder); using (var sourceStream = File.OpenRead(pathToFile)) { using (var pageStream = new MemoryStream()) { sourceStream.CopyTo(pageStream); foreach (var size in imageSizes) { Logger.DebugFormat("Resize job for {0} - {1}", parameters.JobId, size.Name); pageStream.Seek(0, SeekOrigin.Begin); var fileFormat = new Client.Model.DocumentFormat("thumb." + size.Name); string resizeImagePath = Path.Combine( workingFolder, String.Format("{0}.{1}.{2}", Path.GetFileNameWithoutExtension(parameters.FileName), size.Name, fileExtension)); resizeImagePath = SanitizeFileNameForLength(resizeImagePath); using (var outStream = File.OpenWrite(resizeImagePath)) { Logger.DebugFormat("Resizing {0}", parameters.JobId); ImageResizer.Shrink(pageStream, outStream, size.Width, size.Height, format); } await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, fileFormat, resizeImagePath, new Dictionary <string, object>()); } } } Logger.DebugFormat("Ended resize job for {0} - {1}", parameters.JobId, sizesAsString); return(ProcessResult.Ok); }
private async Task <HttpResponseMessage> upload_file(string pathToFile, string documentHandle) { using (var stream = new FileStream(pathToFile, FileMode.Open)) { var multipartFormDataContent = new MultipartFormDataContent("test") { { new StreamContent(stream), Path.GetFileNameWithoutExtension(pathToFile), Path.GetFileName(pathToFile) } }; Controller.Request.Content = multipartFormDataContent; return(await Controller.Upload(_tenantId, new DocumentHandle(documentHandle))); } }
public async Task <Boolean> Write(String workerFolder, PollerJobParameters parameters, String format, int pageIndex, Stream stream) { var rawFileName = Path.Combine(workerFolder, Path.GetFileNameWithoutExtension(parameters.FileName) + ".page_" + pageIndex + "." + format); rawFileName = SanitizeFileNameForLength(rawFileName); using (var outStream = File.OpenWrite(rawFileName)) { stream.CopyTo(outStream); } await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new DocumentFormat(DocumentFormats.RasterImage), rawFileName, new Dictionary <string, object>()).ConfigureAwait(false); return(true); }
string DownloadLocalCopy(String jobId) { Logger.DebugFormat("Downloaded {0}", _inputFileName); if (IsUnzippedHtmlFile()) { return(_inputFileName); } var workingFolder = Path.GetDirectoryName(_inputFileName); ZipFile.ExtractToDirectory(_inputFileName, workingFolder); Logger.DebugFormat("Extracted zip to {0}", workingFolder); var originalFileWithoutExtension = Path.GetFileNameWithoutExtension(_inputFileName); var extractedFiles = Directory.GetFiles(workingFolder, "*.htm*"); String htmlFile = null; if (extractedFiles.Length == 1) { htmlFile = extractedFiles[0]; } else if (extractedFiles.Length > 1) { htmlFile = extractedFiles .FirstOrDefault(f => Path.GetFileNameWithoutExtension(f).TrimEnd('.') == originalFileWithoutExtension) ?? extractedFiles[0]; } if (htmlFile != null) { Logger.Debug($"Extracted html from {_inputFileName} is {htmlFile}"); return(htmlFile); } var msg = $"Html file not found for {jobId} {_inputFileName}"; Logger.Error(msg); throw new Exception(msg); }
public FileNameWithExtension(string fileNameWithExtension) { if (String.IsNullOrWhiteSpace(fileNameWithExtension)) { throw new ArgumentNullException("fileNameWithExtension"); } fileNameWithExtension = fileNameWithExtension.Replace("\"", ""); FileName = Path.GetFileNameWithoutExtension(fileNameWithExtension); Extension = Path.GetExtension(fileNameWithExtension); if (fileNameWithExtension.StartsWith(".")) { FileName = fileNameWithExtension; Extension = ""; } if (!String.IsNullOrWhiteSpace(Extension)) { Extension = Extension.Remove(0, 1).ToLowerInvariant(); } }
public string Convert(String jobId, string pathToEml, string workingFolder) { Logger.DebugFormat("Coverting {0} in working folder {1}", pathToEml, workingFolder); var reader = new Reader(); var outFolder = Path.Combine(workingFolder, jobId); Logger.DebugFormat("Creating message working folder is {0}", outFolder); Directory.CreateDirectory(outFolder); Logger.Debug("Extracting files"); var files = reader.ExtractToFolder(pathToEml, outFolder); if (Logger.IsDebugEnabled) { foreach (var file in files) { Logger.DebugFormat("\t{0}", Path.GetFileName(file)); } Logger.DebugFormat("Total files {0}", files.Length); } var htmlFileName = files.FirstOrDefault(x => x.EndsWith(".htm", StringComparison.OrdinalIgnoreCase)) ?? files.FirstOrDefault(x => x.EndsWith(".html", StringComparison.OrdinalIgnoreCase)); if (htmlFileName == null) { var textFile = files.FirstOrDefault(x => x.EndsWith(".txt", StringComparison.OrdinalIgnoreCase)); if (textFile != null) { htmlFileName = textFile + ".html"; var textcontent = File.ReadAllText(textFile); File.WriteAllText(htmlFileName, String.Format("<html><body><pre>{0}</pre></body></html>", textcontent)); } else { htmlFileName = "contentmissing.html"; File.WriteAllText(htmlFileName, "<html>No content found in mail.</html>"); } } var htmlNameWithoutExtension = Path.GetFileNameWithoutExtension(htmlFileName); var htmlContent = File.ReadAllText(htmlFileName); var dirInfoFullName = new DirectoryInfo(outFolder).FullName; htmlContent = Regex.Replace( htmlContent, @"src=""(?<src>.+?)""", new MatchEvaluator((m) => NormalizeImgEvaluator(m, dirInfoFullName)), RegexOptions.IgnoreCase); File.WriteAllText(htmlFileName, htmlContent); var pathToZip = Path.Combine(workingFolder, htmlNameWithoutExtension + ".ezip"); Logger.DebugFormat("New zip file is {0}", pathToZip); if (File.Exists(pathToZip)) { Logger.DebugFormat("Deleting previous file: {0}", pathToZip); File.Delete(pathToZip); } Logger.DebugFormat("Creating new file: {0}", pathToZip); ZipFile.CreateFromDirectory(outFolder, pathToZip); Logger.DebugFormat("Deleting message working folder", outFolder); Directory.Delete(outFolder, true); Logger.DebugFormat( "Convesion done {0} => {1}", pathToEml, pathToZip ); return(pathToZip); }
public async Task <Boolean> Run( String pathToFile, CreatePdfImageTaskParams createPdfImageTaskParams, Func <int, Stream, Task <Boolean> > pageWriter) { String tempFileName = null; if (Passwords.Count > 0) { tempFileName = Path.Combine(Path.GetDirectoryName(pathToFile), Path.GetFileNameWithoutExtension(pathToFile) + "_decrypted.pdf"); if (Decryptor.DecryptFile(pathToFile, tempFileName, Passwords)) { pathToFile = tempFileName; } } using (var sourceStream = File.OpenRead(pathToFile)) { var settings = new MagickReadSettings { Density = new PointD(createPdfImageTaskParams.Dpi, createPdfImageTaskParams.Dpi) }; settings.FrameIndex = 0; // First page settings.FrameCount = 1; // Number of pages MagickFormat imageFormat = TranslateFormat(createPdfImageTaskParams.Format); Logger.DebugFormat("Image format is {0}", imageFormat.ToString()); using (var images = new MagickImageCollection()) { bool done = false; if (!_firstDone) { lock (LockForInitializationIssue) { if (!_firstDone) { images.Read(sourceStream, settings); done = true; } } } if (!done) { images.Read(sourceStream, settings); } var lastImage = Math.Min(createPdfImageTaskParams.FromPage - 1 + createPdfImageTaskParams.Pages, images.Count) - 1; for (int page = createPdfImageTaskParams.FromPage - 1; page <= lastImage; page++) { var image = images[page]; image.Format = imageFormat; using (var ms = new MemoryStream()) { image.Write(ms); ms.Seek(0L, SeekOrigin.Begin); await pageWriter(page + 1, ms).ConfigureAwait(false); } } } } if (!String.IsNullOrEmpty(tempFileName) && File.Exists(tempFileName)) { File.Delete(tempFileName); } return(true); }
protected async override Task <ProcessResult> OnPolling( PollerJobParameters parameters, String workingFolder) { Boolean result; var contentFileName = Path.ChangeExtension(parameters.FileName, ".content"); if (!_formats.Contains(parameters.FileExtension)) { Logger.DebugFormat("Document for job Id {0} has an extension not supported, setting null content", parameters.JobId); return(new ProcessResult(await AddNullContentFormat(parameters, contentFileName))); } Logger.DebugFormat("Starting tika on job: {0}, file extension {1}", parameters.JobId, parameters.FileExtension); Logger.DebugFormat("Downloading blob for job: {0}, on local path {1}", parameters.JobId, workingFolder); string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder); pathToFile = ProcessFile(pathToFile, workingFolder); Boolean shouldAnalyze = _filterManager.ShouldAnalyze(parameters.FileName, pathToFile); if (!shouldAnalyze) { Logger.InfoFormat("File {0} for job {1} was discharded!", parameters.FileName, parameters.JobId); return(new ProcessResult(await AddNullContentFormat(parameters, contentFileName))); } Logger.DebugFormat("Search for password JobId:{0}", parameters.JobId); var passwords = ClientPasswordSet.GetPasswordFor(parameters.FileName).ToArray(); String content = ""; Int32 analyzerOrdinal = 0; Boolean success = false; var analyzer = BuildAnalyzer(analyzerOrdinal); do { try { if (passwords.Any()) { //Try with all the password foreach (var password in passwords) { try { content = analyzer.GetHtmlContent(pathToFile, password) ?? ""; break; //first password that can decrypt file break the list of password to try } catch (Exception) { Logger.ErrorFormat("Error opening file {0} with password", parameters.FileName); } } } else { //Simply analyze file without password Logger.DebugFormat("Analyze content JobId: {0} -> Path: {1}", parameters.JobId, pathToFile); content = analyzer.GetHtmlContent(pathToFile, "") ?? ""; } success = true; } catch (Exception ex) { Logger.ErrorFormat(ex, "Error extracting tika with analyzer {0} on file {1}", analyzer.Describe(), parameters.FileName, parameters.JobId); analyzer = BuildAnalyzer(++analyzerOrdinal); if (analyzer != null) { Logger.InfoFormat("Retry job {0} with analyzer {1}", parameters.JobId, analyzer.Describe()); } } } while (analyzer != null && success == false); Logger.DebugFormat("Finished tika on job: {0}, charsNum {1}", parameters.JobId, content.Count()); String sanitizedContent = content; if (!string.IsNullOrWhiteSpace(content)) { var resultContent = _builder.CreateFromTikaPlain(content); var documentContent = resultContent.Content; sanitizedContent = resultContent.SanitizedTikaContent; var pages = documentContent.Pages.Count(); string lang = null; if (pages > 1) { lang = LanguageDetector.GetLanguage(documentContent.Pages[1].Content); } if (lang == null && pages == 1) { lang = LanguageDetector.GetLanguage(documentContent.Pages[0].Content); } if (lang != null) { documentContent.AddMetadata(DocumentContent.MedatataLanguage, lang); } result = await AddFormatToDocumentFromObject( parameters.TenantId, this.QueueName, parameters.JobId, new DocumentFormat(DocumentFormats.Content), documentContent, contentFileName, new Dictionary <string, object>()); Logger.DebugFormat("Added format {0} to jobId {1}, result: {2}", DocumentFormats.Content, parameters.JobId, result); } var tikaFileName = Path.Combine(workingFolder, Path.GetFileNameWithoutExtension(parameters.FileName) + ".tika.html"); tikaFileName = SanitizeFileNameForLength(tikaFileName); File.WriteAllText(tikaFileName, sanitizedContent); result = await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new DocumentFormat(DocumentFormats.Tika), tikaFileName, new Dictionary <string, object>()); Logger.DebugFormat("Added format {0} to jobId {1}, result: {2}", DocumentFormats.Tika, parameters.JobId, result); return(ProcessResult.Ok); }