private static string GeneratePlaceholderFile(string workingFolder, string fileName, string documentHandle) { string pdfFileToAppend = Path.Combine(workingFolder, Guid.NewGuid() + ".pdf"); using (PdfSharp.Pdf.PdfDocument document = new PdfSharp.Pdf.PdfDocument()) { // Create an empty page PdfSharp.Pdf.PdfPage page = document.AddPage(); // Get an XGraphics object for drawing using (XGraphics gfx = XGraphics.FromPdfPage(page)) { // Create a font XFont font = new XFont("Verdana", 14, XFontStyle.Bold); // Draw the text gfx.DrawString("Handle " + documentHandle + " fileName " + fileName + " has no pdf format", font, XBrushes.Black, new XRect(0, 0, page.Width, page.Height), XStringFormats.TopCenter); } document.Save(pdfFileToAppend); } return(pdfFileToAppend); }
private static void TestFile( List <PollerTestResult> retValue, ITikaAnalyzer analyzer, String fileName, String type, String expected, Byte[] fileContent) { var tempFile = Path.Combine(Path.GetTempPath(), fileName); if (File.Exists(tempFile)) { File.Delete(tempFile); } File.WriteAllBytes(tempFile, fileContent); try { string content = analyzer.GetHtmlContent(tempFile, ""); if (content.Contains(expected)) { retValue.Add(new PollerTestResult(true, type + " conversion")); } else { retValue.Add(new PollerTestResult(false, type + " conversion: wrong content")); } } catch (Exception ex) { retValue.Add(new PollerTestResult(false, type + " conversion: " + ex.Message)); } }
public async Task can_upload_document_with_name_greater_than_250_char() { var handle = DocumentHandle.FromString("Pdf_3"); String longFileName = Path.Combine( Path.GetTempPath(), "_lfn" + new string('X', 240) + ".pdf"); if (!File.Exists(longFileName)) { File.Copy(TestConfig.PathToDocumentPdf, longFileName); } await _documentStoreClient.UploadAsync(longFileName, handle); // wait background projection polling await UpdateAndWaitAsync().ConfigureAwait(false); // check readmodel var tenantAccessor = ContainerAccessor.Instance.Resolve <ITenantAccessor>(); var tenant = tenantAccessor.GetTenant(new TenantId(TestConfig.Tenant)); var docReader = tenant.Container.Resolve <IMongoDbReader <DocumentDescriptorReadModel, DocumentDescriptorId> >(); var allDocuments = docReader.AllUnsorted.Count(); Assert.AreEqual(1, allDocuments); }
public void queue_folder() { var sourceFolder = @"c:\Downloads\video\"; var taskFolder = @"c:\temp\dsqueue"; var files = Directory.GetFiles(sourceFolder, "*.*", SearchOption.AllDirectories); var docs = new DocumentStoreServiceClient(new Uri("http://ds"), "docs"); // var demo = new DocumentStoreServiceClient(new Uri("http://ds"), "demo"); var counter = 1; Parallel.ForEach(files, file => { var handle = "import_" + counter++; var taskDoc = docs.CreateDocumentImportData( Guid.NewGuid(), file, Path.GetFileName(file), new DocumentHandle(handle) ); taskDoc.DeleteAfterImport = false; var docsFile = Path.Combine(taskFolder, "doc_" + handle + "_" + DateTime.Now.Ticks); docs.QueueDocumentImport(taskDoc, docsFile); //var taskDemo = demo.CreateDocumentImportData( // Guid.NewGuid(), // file, // new DocumentHandle(handle) //); //taskDemo.DeleteAfterImport = false; //var demoFile = Path.Combine(taskFolder, "demo_" + handle + "_" + DateTime.Now.Ticks); //demo.QueueDocumentImport(taskDemo, demoFile); }); }
protected async override Task <ProcessResult> OnPolling( Shared.Jobs.PollerJobParameters parameters, string workingFolder) { Logger.DebugFormat("Downloaded file {0} to be converted to pdf", parameters.FileName); var converter = Converters.FirstOrDefault(c => c.CanConvert(parameters.FileName)); if (converter == null) { Logger.InfoFormat("No converter for extension {0}", Path.GetExtension(parameters.FileName)); return(ProcessResult.Ok); } //Download file only if we have one converter that can generate pdf. string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder).ConfigureAwait(false); string outFile = Path.Combine(workingFolder, Guid.NewGuid() + ".pdf"); if (!converter.Convert(pathToFile, outFile)) { Logger.ErrorFormat("Error converting file {0} to pdf", pathToFile); return(ProcessResult.Fail($"Error converting file {pathToFile} to pdf")); } await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new DocumentFormat(DocumentFormats.Pdf), outFile, new Dictionary <string, object>()).ConfigureAwait(false); return(ProcessResult.Ok); }
protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder) { string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder).ConfigureAwait(false); String fileName = Path.Combine(Path.GetDirectoryName(pathToFile), parameters.All[JobKeys.FileName]); Logger.DebugFormat("Move blob id {0} to real filename {1}", pathToFile, fileName); if (File.Exists(fileName)) { File.Delete(fileName); } File.Copy(pathToFile, fileName); var converter = new HtmlToPdfConverterFromDiskFile(fileName, base.JobsHostConfiguration) { Logger = Logger }; var pdfConvertedFileName = converter.Run(parameters.TenantId, parameters.JobId); await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new DocumentFormat(DocumentFormats.Pdf), pdfConvertedFileName, new Dictionary <string, object>()).ConfigureAwait(false); return(ProcessResult.Ok); }
public string GetWorkingFolder(string tenantId, string blobId) { if (tenantId == null) { throw new ArgumentNullException("tenantId"); } if (blobId == null) { throw new ArgumentNullException("blobId"); } return(EnsureFolder(Path.Combine(GetConfigValue("TEMP"), tenantId, blobId))); }
public void SetUp() { longFolderName = Path.Combine(Path.GetTempPath(), new String('a', 230)); _blobId = new BlobId(_originalFormat, 1); _pathToTask = Path.Combine(longFolderName, "File_1.dsimport"); _fileToImport = Path.Combine(longFolderName, "A Word Document.docx"); _fileUri = new Uri(Path.Combine(longFolderName, "A word document.docx")); ClearQueueTempFolder(); Directory.CreateDirectory(longFolderName); File.Copy(Path.Combine(TestConfig.DocumentsFolder, "Queue\\File_1.dsimport"), _pathToTask); File.Copy(TestConfig.PathToWordDocument, _fileToImport); var accessor = Substitute.For <ITenantAccessor>(); var tenant = Substitute.For <ITenant>(); tenant.Id.Returns(new TenantId("tests")); var container = Substitute.For <IWindsorContainer>(); _commandBus = Substitute.For <ICommandBus>(); var identityGenerator = Substitute.For <IIdentityGenerator>(); _blobstore = Substitute.For <IBlobStore>(); _blobstore.Upload(Arg.Is(_originalFormat), Arg.Any <string>()).Returns(_blobId); _blobstore.Upload(Arg.Is(_originalFormat), Arg.Any <FileNameWithExtension>(), Arg.Any <Stream>()).Returns(_blobId); accessor.GetTenant(_testTenant).Returns(tenant); accessor.Current.Returns(tenant); tenant.Container.Returns(container); container.Resolve <IBlobStore>().Returns(_blobstore); container.Resolve <IIdentityGenerator>().Returns(identityGenerator); container.Resolve <IMongoDatabase>().Returns(MongoDbTestConnectionProvider.ReadModelDb); var collection = MongoDbTestConnectionProvider.ReadModelDb.GetCollection <ImportFailure>("sys.importFailures"); collection.Drop(); DocumentStoreTestConfiguration config = new DocumentStoreTestConfiguration(tenantId: "tests"); config.SetFolderToMonitor(longFolderName); var sysDb = config.TenantSettings.Single(t => t.TenantId == "tests").Get <IMongoDatabase>("system.db"); sysDb.Drop(); _queue = new ImportFormatFromFileQueue(config, accessor, _commandBus) { Logger = new ConsoleLogger() }; _queue.DeleteTaskFileAfterImport = false; }
public string Download(BlobId blobId, string folder) { if (blobId == null) { throw new ArgumentNullException(nameof(blobId)); } if (String.IsNullOrEmpty(folder)) { throw new ArgumentNullException(nameof(folder)); } if (!Directory.Exists(folder)) { throw new ArgumentException($"folder {folder} does not exists", nameof(folder)); } var descriptor = _mongodDbFileSystemBlobDescriptorStorage.FindOneById(blobId); if (descriptor == null) { throw new ArgumentException($"Descriptor for {blobId} not found in {_mongodDbFileSystemBlobDescriptorStorage.GetType().Name}"); } var localFileName = _directoryManager.GetFileNameFromBlobId(blobId, descriptor.FileNameWithExtension); if (!File.Exists(localFileName)) { Logger.Error($"Blob {blobId} has descriptor, but blob file {localFileName} not found in the system."); throw new ArgumentException($"Blob {blobId} not found"); } var originalFileName = descriptor.FileNameWithExtension.ToString(); string destinationFileName = Path.Combine(folder, originalFileName); Int32 uniqueId = 1; while (File.Exists(destinationFileName)) { destinationFileName = Path.Combine(folder, Path.GetFileNameWithoutExtension(originalFileName) + $" ({uniqueId++})") + Path.GetExtension(originalFileName); } File.Copy(localFileName, destinationFileName); if (Logger.IsDebugEnabled) { Logger.Debug($"Blob {blobId} downloaded in folder {folder} with name {destinationFileName}"); } return(destinationFileName); }
private static async Task <bool> InnerGetPdf(string workingFolder, DocumentStoreServiceClient client, List <FileToComposeData> files, string handle, DocumentHandle documentHandle, bool pdfExists) { var pdfData = client.OpenRead(documentHandle, DocumentFormats.Pdf); var tempFile = Path.Combine(workingFolder, Guid.NewGuid() + ".pdf"); using (var downloaded = new FileStream(tempFile, FileMode.OpenOrCreate, FileAccess.Write)) { var stream = await pdfData.OpenStream(); stream.CopyTo(downloaded); } files.Add(FileToComposeData.FromDownloadedPdf(tempFile, handle)); pdfExists = true; return(pdfExists); }
private static void TestFile( List <PollerTestResult> retValue, CreateImageFromPdfTask task, String fileName, Byte[] fileContent) { var tempFile = Path.Combine(Path.GetTempPath(), fileName); if (File.Exists(tempFile)) { File.Delete(tempFile); } File.WriteAllBytes(tempFile, fileContent); try { var convertParams = new CreatePdfImageTaskParams() { Dpi = 150, FromPage = 1, Pages = 1, Format = CreatePdfImageTaskParams.ImageFormat.Jpg, }; Boolean wasCalled = false; var result = task.Run( tempFile, convertParams, (i, s) => { wasCalled = true; return(Task.FromResult <Boolean>(true)); } ); result.Wait(); if (wasCalled) { retValue.Add(new PollerTestResult(true, "Pdf to Jpg")); } else { retValue.Add(new PollerTestResult(false, "Pdf to Jpg")); } } catch (Exception ex) { retValue.Add(new PollerTestResult(false, "Pdf to Jpg: " + ex.Message)); } }
protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder) { var fileExtension = parameters.All[JobKeys.ThumbnailFormat]; ImageFormat format = GetFormatFromExtension(fileExtension); var sizesAsString = parameters.All[JobKeys.Sizes]; var imageSizes = SizeInfoHelper.Deserialize(sizesAsString); Logger.DebugFormat("Starting resize job for {0} - {1}", parameters.JobId, sizesAsString); string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder); using (var sourceStream = File.OpenRead(pathToFile)) { using (var pageStream = new MemoryStream()) { sourceStream.CopyTo(pageStream); foreach (var size in imageSizes) { Logger.DebugFormat("Resize job for {0} - {1}", parameters.JobId, size.Name); pageStream.Seek(0, SeekOrigin.Begin); var fileFormat = new Client.Model.DocumentFormat("thumb." + size.Name); string resizeImagePath = Path.Combine( workingFolder, String.Format("{0}.{1}.{2}", Path.GetFileNameWithoutExtension(parameters.FileName), size.Name, fileExtension)); resizeImagePath = SanitizeFileNameForLength(resizeImagePath); using (var outStream = File.OpenWrite(resizeImagePath)) { Logger.DebugFormat("Resizing {0}", parameters.JobId); ImageResizer.Shrink(pageStream, outStream, size.Width, size.Height, format); } await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, fileFormat, resizeImagePath, new Dictionary <string, object>()); } } } Logger.DebugFormat("Ended resize job for {0} - {1}", parameters.JobId, sizesAsString); return(ProcessResult.Ok); }
public async Task <Boolean> Write(String workerFolder, PollerJobParameters parameters, String format, int pageIndex, Stream stream) { var rawFileName = Path.Combine(workerFolder, Path.GetFileNameWithoutExtension(parameters.FileName) + ".page_" + pageIndex + "." + format); rawFileName = SanitizeFileNameForLength(rawFileName); using (var outStream = File.OpenWrite(rawFileName)) { stream.CopyTo(outStream); } await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new DocumentFormat(DocumentFormats.RasterImage), rawFileName, new Dictionary <string, object>()).ConfigureAwait(false); return(true); }
private String GetRawFileNameFromBlobId(BlobId blobId, String fileName) { var id = blobId.Id; var stringPadded = String.Format("{0:D15}", id / 1000); StringBuilder directoryName = new StringBuilder(15); for (int i = 0; i < Math.Min(stringPadded.Length, 15); i++) { directoryName.Append(stringPadded[i]); if (i % _folderPrefixLength == (_folderPrefixLength - 1)) { directoryName.Append(System.IO.Path.DirectorySeparatorChar); } } var finalDirectory = Path.Combine(BaseDirectory, blobId.Format, directoryName.ToString()); Directory.EnsureDirectory(finalDirectory); return(finalDirectory + id + "." + Path.GetFileName(fileName)); }
/// <summary> /// Create a series of subdirectories that avoid cluttering thousands /// of files inside the very same folder. /// The logic is the following, we want at most 1000 file in a folder, so /// we divide the id by 1000 and we pad to 15 number, then we subdivide /// the resulting number in blok by 4, each folder will contain at maximum /// 1000 folders or files. /// </summary> /// <param name="blobId"></param> /// <returns></returns> public String GetFileNameFromBlobId(BlobId blobId) { var id = blobId.Id; var stringPadded = String.Format("{0:D15}", id / 1000); StringBuilder directoryName = new StringBuilder(15); for (int i = 0; i < Math.Min(stringPadded.Length, 15); i++) { directoryName.Append(stringPadded[i]); if (i % 3 == 2) { directoryName.Append(System.IO.Path.DirectorySeparatorChar); } } var finalDirectory = Path.Combine(_baseDirectory, blobId.Format, directoryName.ToString()); Directory.EnsureDirectory(finalDirectory); return(finalDirectory + id + ".blob"); }
public List <PollerTestResult> Execute() { List <PollerTestResult> retValue = new List <PollerTestResult>(); String format = "png"; Int32 secondsOffset = 4; String vlcExecutable = Helper.GetExecutableLocation(); if (vlcExecutable == null) { retValue.Add(new PollerTestResult(false, "Executable location, use app settings vlc_location")); return(retValue); } else { retValue.Add(new PollerTestResult(true, "Executable location, ")); } try { var worker = new VlcCommandLineThumbnailCreator(vlcExecutable, format, NullLogger.Instance); var tempFile = Path.Combine(Path.GetTempPath(), "video.mp4"); if (File.Exists(tempFile)) { File.Delete(tempFile); } File.WriteAllBytes(tempFile, TestFiles.video); var thumb = worker.CreateThumbnail(tempFile, Path.GetTempPath(), 4); retValue.Add(new PollerTestResult( !String.IsNullOrEmpty(tempFile), "video thumb extraction: ")); } catch (Exception ex) { retValue.Add(new PollerTestResult(false, "video thumb extraction: " + ex.Message)); } return(retValue); }
protected async Task <String> DownloadBlob( String tenantId, String jobId, String originalFileName, String workingFolder) { String fileName = Path.Combine(workingFolder, originalFileName); //TooLongNames should be truncated, tika, or other libraries are not able to access //too long file names. Max file name is 260, but some libraries or task can append some more char //ex tika.html fileName = SanitizeFileNameForLength(fileName); DocumentStoreServiceClient client = GetDocumentStoreClient(tenantId); var reader = client.OpenBlobIdForRead(this.QueueName, jobId); using (var downloaded = new FileStream(fileName, FileMode.OpenOrCreate, FileAccess.Write)) { var stream = await reader.OpenStream().ConfigureAwait(false); stream.CopyTo(downloaded); } Logger.DebugFormat("Downloaded blob for job {0} for tenant {1} in local file {2}", jobId, tenantId, fileName); return(fileName); }
public void should_serialize_document_import_data() { var fname = Path.Combine(TestConfig.TempFolder, "a_file_to_import"); var client = new DocumentStoreServiceClient(new Uri("http://ds"), "test"); var did = client.CreateDocumentImportData(TaskId, "c:\\temp\\a file.docx", "a file.docx", Doc); client.QueueDocumentImport(did, fname); Assert.IsTrue(File.Exists(fname + ".dsimport")); const string expected = @"{ ""TaskId"": ""9a29d730-f57a-41e4-92ba-55b7d99712a2"", ""Uri"": ""c:\\temp\\a file.docx"", ""FileName"": ""a file.docx"", ""Handle"": ""doc"", ""Format"": ""original"", ""Tenant"": ""test"", ""CustomData"": null, ""DeleteAfterImport"": false }"; Assert.AreEqual(expected, File.ReadAllText(fname + ".dsimport")); }
private void ExecuteJobCore() { do { //if half of the task thread finished working, we should end all the pool and restart if (ThreadNumber > 2 && _numOfPollerTaskActive < (ThreadNumber / 2)) { //This can happen because jobs are generated not in block, if we have ex 6 threads //base jobs started 6 tasks, then if in a moment only one task remain only one task remain active //then if the queue manager queue 100 jobs, we have only one thread active. This condition //stops the poll if half of the threads are active, so we need to restart polling with all the tasks. return; } String workingFolder = null; QueuedJobDto nextJob = DsGetNextJob(); if (nextJob == null) { System.Threading.Interlocked.Decrement(ref _numOfPollerTaskActive); return; } Logger.ThreadProperties["job-id"] = nextJob.Id; var baseParameters = ExtractJobParameters(nextJob); //remember to enter the right tenant. workingFolder = Path.Combine( JobsHostConfiguration.GetWorkingFolder(baseParameters.TenantId, GetType().Name), baseParameters.JobId ); if (Directory.Exists(workingFolder)) { Directory.Delete(workingFolder, true); } Directory.CreateDirectory(workingFolder); try { var task = OnPolling(baseParameters, workingFolder); var result = task.Result; if (result.Result) { Logger.DebugFormat("Successfully executed Job: {0}", nextJob.Id); } else { Logger.ErrorFormat("Job {0} completed with errors: {1} with result", nextJob.Id, result.ErrorMessage); } //The execution if failed can be posticipated to future time, probably because the job can retry after a certain //period of time. if (!result.Posticipate) { DsSetJobExecuted(QueueName, nextJob.Id, result.ErrorMessage, result.ParametersToModify); } else { DsReQueueJob(QueueName, nextJob.Id, result.ErrorMessage, result.PosticipateExecutionTimestamp, result.ParametersToModify); } } catch (AggregateException aex) { Logger.ErrorFormat(aex, "Error executing queued job {0} on tenant {1} - {2}", nextJob.Id, nextJob.Parameters[JobKeys.TenantId], aex?.InnerExceptions?[0]?.Message); StringBuilder aggregateMessage = new StringBuilder(); aggregateMessage.Append(aex.Message); foreach (var ex in aex.InnerExceptions) { var errorMessage = String.Format("Inner error queued job {0} queue {1}: {2}", nextJob.Id, this.QueueName, ex.Message); LogExceptionAndAllInnerExceptions(ex, errorMessage); aggregateMessage.Append(errorMessage); } DsSetJobExecuted(QueueName, nextJob.Id, aggregateMessage.ToString(), null); } catch (Exception ex) { var errorMessage = String.Format("Error executing queued job {0} on tenant {1}", nextJob.Id, nextJob.Parameters[JobKeys.TenantId]); LogExceptionAndAllInnerExceptions(ex, errorMessage); DsSetJobExecuted(QueueName, nextJob.Id, ex.Message, null); } finally { DeleteWorkingFolder(workingFolder); Logger.ThreadProperties["job-id"] = null; } } while (true); //Exit is in the internal loop }
public string Convert(String jobId, string pathToEml, string workingFolder) { Logger.DebugFormat("Coverting {0} in working folder {1}", pathToEml, workingFolder); var reader = new Reader(); var outFolder = Path.Combine(workingFolder, jobId); Logger.DebugFormat("Creating message working folder is {0}", outFolder); Directory.CreateDirectory(outFolder); Logger.Debug("Extracting files"); var files = reader.ExtractToFolder(pathToEml, outFolder); if (Logger.IsDebugEnabled) { foreach (var file in files) { Logger.DebugFormat("\t{0}", Path.GetFileName(file)); } Logger.DebugFormat("Total files {0}", files.Length); } var htmlFileName = files.FirstOrDefault(x => x.EndsWith(".htm", StringComparison.OrdinalIgnoreCase)) ?? files.FirstOrDefault(x => x.EndsWith(".html", StringComparison.OrdinalIgnoreCase)); if (htmlFileName == null) { var textFile = files.FirstOrDefault(x => x.EndsWith(".txt", StringComparison.OrdinalIgnoreCase)); if (textFile != null) { htmlFileName = textFile + ".html"; var textcontent = File.ReadAllText(textFile); File.WriteAllText(htmlFileName, String.Format("<html><body><pre>{0}</pre></body></html>", textcontent)); } else { htmlFileName = "contentmissing.html"; File.WriteAllText(htmlFileName, "<html>No content found in mail.</html>"); } } var htmlNameWithoutExtension = Path.GetFileNameWithoutExtension(htmlFileName); var htmlContent = File.ReadAllText(htmlFileName); var dirInfoFullName = new DirectoryInfo(outFolder).FullName; htmlContent = Regex.Replace( htmlContent, @"src=""(?<src>.+?)""", new MatchEvaluator((m) => NormalizeImgEvaluator(m, dirInfoFullName)), RegexOptions.IgnoreCase); File.WriteAllText(htmlFileName, htmlContent); var pathToZip = Path.Combine(workingFolder, htmlNameWithoutExtension + ".ezip"); Logger.DebugFormat("New zip file is {0}", pathToZip); if (File.Exists(pathToZip)) { Logger.DebugFormat("Deleting previous file: {0}", pathToZip); File.Delete(pathToZip); } Logger.DebugFormat("Creating new file: {0}", pathToZip); ZipFile.CreateFromDirectory(outFolder, pathToZip); Logger.DebugFormat("Deleting message working folder", outFolder); Directory.Delete(outFolder, true); Logger.DebugFormat( "Convesion done {0} => {1}", pathToEml, pathToZip ); return(pathToZip); }