public string Download(BlobId blobId, string folder)
        {
            if (blobId == null)
            {
                throw new ArgumentNullException(nameof(blobId));
            }

            if (String.IsNullOrEmpty(folder))
            {
                throw new ArgumentNullException(nameof(folder));
            }

            if (!Directory.Exists(folder))
            {
                throw new ArgumentException($"folder {folder} does not exists", nameof(folder));
            }

            var descriptor = _mongodDbFileSystemBlobDescriptorStorage.FindOneById(blobId);

            if (descriptor == null)
            {
                throw new ArgumentException($"Descriptor for {blobId} not found in {_mongodDbFileSystemBlobDescriptorStorage.GetType().Name}");
            }

            var localFileName = _directoryManager.GetFileNameFromBlobId(blobId, descriptor.FileNameWithExtension);

            if (!File.Exists(localFileName))
            {
                Logger.Error($"Blob {blobId} has descriptor, but blob file {localFileName} not found in the system.");
                throw new ArgumentException($"Blob {blobId} not found");
            }

            var    originalFileName    = descriptor.FileNameWithExtension.ToString();
            string destinationFileName = Path.Combine(folder, originalFileName);
            Int32  uniqueId            = 1;

            while (File.Exists(destinationFileName))
            {
                destinationFileName = Path.Combine(folder, Path.GetFileNameWithoutExtension(originalFileName) + $" ({uniqueId++})") + Path.GetExtension(originalFileName);
            }

            File.Copy(localFileName, destinationFileName);

            if (Logger.IsDebugEnabled)
            {
                Logger.Debug($"Blob {blobId} downloaded in folder {folder} with name {destinationFileName}");
            }
            return(destinationFileName);
        }
示例#2
0
        private Process GetLocalProcessForQueue(String queueId, String executableName)
        {
            var processFileName = Path.GetFileNameWithoutExtension(executableName);
            var processes       = Process.GetProcessesByName(processFileName, Environment.MachineName);

            foreach (Process process in processes)
            {
                var cmdLine = GetCommandLine(process);
                if (cmdLine.Contains("/queue:" + queueId))
                {
                    return(process);
                }
            }
            return(null);
        }
示例#3
0
        protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder)
        {
            var         fileExtension = parameters.All[JobKeys.ThumbnailFormat];
            ImageFormat format        = GetFormatFromExtension(fileExtension);
            var         sizesAsString = parameters.All[JobKeys.Sizes];
            var         imageSizes    = SizeInfoHelper.Deserialize(sizesAsString);

            Logger.DebugFormat("Starting resize job for {0} - {1}", parameters.JobId, sizesAsString);

            string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder);


            using (var sourceStream = File.OpenRead(pathToFile))
            {
                using (var pageStream = new MemoryStream())
                {
                    sourceStream.CopyTo(pageStream);

                    foreach (var size in imageSizes)
                    {
                        Logger.DebugFormat("Resize job for {0} - {1}", parameters.JobId, size.Name);
                        pageStream.Seek(0, SeekOrigin.Begin);
                        var fileFormat = new Client.Model.DocumentFormat("thumb." + size.Name);

                        string resizeImagePath = Path.Combine(
                            workingFolder,
                            String.Format("{0}.{1}.{2}", Path.GetFileNameWithoutExtension(parameters.FileName), size.Name, fileExtension));
                        resizeImagePath = SanitizeFileNameForLength(resizeImagePath);
                        using (var outStream = File.OpenWrite(resizeImagePath))
                        {
                            Logger.DebugFormat("Resizing {0}", parameters.JobId);
                            ImageResizer.Shrink(pageStream, outStream, size.Width, size.Height, format);
                        }
                        await AddFormatToDocumentFromFile(
                            parameters.TenantId,
                            parameters.JobId,
                            fileFormat,
                            resizeImagePath, new Dictionary <string, object>());
                    }
                }
            }

            Logger.DebugFormat("Ended resize job for {0} - {1}", parameters.JobId, sizesAsString);
            return(ProcessResult.Ok);
        }
示例#4
0
        private async Task <HttpResponseMessage> upload_file(string pathToFile, string documentHandle)
        {
            using (var stream = new FileStream(pathToFile, FileMode.Open))
            {
                var multipartFormDataContent = new MultipartFormDataContent("test")
                {
                    {
                        new StreamContent(stream),
                        Path.GetFileNameWithoutExtension(pathToFile),
                        Path.GetFileName(pathToFile)
                    }
                };

                Controller.Request.Content = multipartFormDataContent;

                return(await Controller.Upload(_tenantId, new DocumentHandle(documentHandle)));
            }
        }
示例#5
0
        public async Task <Boolean> Write(String workerFolder, PollerJobParameters parameters, String format, int pageIndex, Stream stream)
        {
            var rawFileName = Path.Combine(workerFolder, Path.GetFileNameWithoutExtension(parameters.FileName) + ".page_" + pageIndex + "." + format);

            rawFileName = SanitizeFileNameForLength(rawFileName);
            using (var outStream = File.OpenWrite(rawFileName))
            {
                stream.CopyTo(outStream);
            }

            await AddFormatToDocumentFromFile(
                parameters.TenantId,
                parameters.JobId,
                new DocumentFormat(DocumentFormats.RasterImage),
                rawFileName,
                new Dictionary <string, object>()).ConfigureAwait(false);

            return(true);
        }
示例#6
0
        string DownloadLocalCopy(String jobId)
        {
            Logger.DebugFormat("Downloaded {0}", _inputFileName);

            if (IsUnzippedHtmlFile())
            {
                return(_inputFileName);
            }

            var workingFolder = Path.GetDirectoryName(_inputFileName);

            ZipFile.ExtractToDirectory(_inputFileName, workingFolder);
            Logger.DebugFormat("Extracted zip to {0}", workingFolder);

            var originalFileWithoutExtension = Path.GetFileNameWithoutExtension(_inputFileName);

            var    extractedFiles = Directory.GetFiles(workingFolder, "*.htm*");
            String htmlFile       = null;

            if (extractedFiles.Length == 1)
            {
                htmlFile = extractedFiles[0];
            }
            else if (extractedFiles.Length > 1)
            {
                htmlFile = extractedFiles
                           .FirstOrDefault(f => Path.GetFileNameWithoutExtension(f).TrimEnd('.') == originalFileWithoutExtension) ??
                           extractedFiles[0];
            }

            if (htmlFile != null)
            {
                Logger.Debug($"Extracted html from {_inputFileName} is {htmlFile}");
                return(htmlFile);
            }

            var msg = $"Html file not found for {jobId} {_inputFileName}";

            Logger.Error(msg);
            throw new Exception(msg);
        }
        public FileNameWithExtension(string fileNameWithExtension)
        {
            if (String.IsNullOrWhiteSpace(fileNameWithExtension))
            {
                throw new ArgumentNullException("fileNameWithExtension");
            }

            fileNameWithExtension = fileNameWithExtension.Replace("\"", "");

            FileName  = Path.GetFileNameWithoutExtension(fileNameWithExtension);
            Extension = Path.GetExtension(fileNameWithExtension);
            if (fileNameWithExtension.StartsWith("."))
            {
                FileName  = fileNameWithExtension;
                Extension = "";
            }
            if (!String.IsNullOrWhiteSpace(Extension))
            {
                Extension = Extension.Remove(0, 1).ToLowerInvariant();
            }
        }
示例#8
0
        public string Convert(String jobId, string pathToEml, string workingFolder)
        {
            Logger.DebugFormat("Coverting {0} in working folder {1}", pathToEml, workingFolder);

            var reader = new Reader();

            var outFolder = Path.Combine(workingFolder, jobId);

            Logger.DebugFormat("Creating message working folder is {0}", outFolder);

            Directory.CreateDirectory(outFolder);

            Logger.Debug("Extracting files");

            var files = reader.ExtractToFolder(pathToEml, outFolder);

            if (Logger.IsDebugEnabled)
            {
                foreach (var file in files)
                {
                    Logger.DebugFormat("\t{0}", Path.GetFileName(file));
                }
                Logger.DebugFormat("Total files {0}", files.Length);
            }
            var htmlFileName = files.FirstOrDefault(x => x.EndsWith(".htm", StringComparison.OrdinalIgnoreCase)) ??
                               files.FirstOrDefault(x => x.EndsWith(".html", StringComparison.OrdinalIgnoreCase));

            if (htmlFileName == null)
            {
                var textFile = files.FirstOrDefault(x => x.EndsWith(".txt", StringComparison.OrdinalIgnoreCase));
                if (textFile != null)
                {
                    htmlFileName = textFile + ".html";
                    var textcontent = File.ReadAllText(textFile);
                    File.WriteAllText(htmlFileName, String.Format("<html><body><pre>{0}</pre></body></html>", textcontent));
                }
                else
                {
                    htmlFileName = "contentmissing.html";
                    File.WriteAllText(htmlFileName, "<html>No content found in mail.</html>");
                }
            }
            var htmlNameWithoutExtension = Path.GetFileNameWithoutExtension(htmlFileName);

            var htmlContent     = File.ReadAllText(htmlFileName);
            var dirInfoFullName = new DirectoryInfo(outFolder).FullName;

            htmlContent = Regex.Replace(
                htmlContent,
                @"src=""(?<src>.+?)""",
                new MatchEvaluator((m) => NormalizeImgEvaluator(m, dirInfoFullName)),
                RegexOptions.IgnoreCase);
            File.WriteAllText(htmlFileName, htmlContent);

            var pathToZip = Path.Combine(workingFolder, htmlNameWithoutExtension + ".ezip");

            Logger.DebugFormat("New zip file is {0}", pathToZip);

            if (File.Exists(pathToZip))
            {
                Logger.DebugFormat("Deleting previous file: {0}", pathToZip);
                File.Delete(pathToZip);
            }

            Logger.DebugFormat("Creating new file: {0}", pathToZip);
            ZipFile.CreateFromDirectory(outFolder, pathToZip);

            Logger.DebugFormat("Deleting message working folder", outFolder);
            Directory.Delete(outFolder, true);

            Logger.DebugFormat(
                "Convesion done {0} => {1}",
                pathToEml,
                pathToZip
                );
            return(pathToZip);
        }
        public async Task <Boolean> Run(
            String pathToFile,
            CreatePdfImageTaskParams createPdfImageTaskParams,
            Func <int, Stream, Task <Boolean> > pageWriter)
        {
            String tempFileName = null;

            if (Passwords.Count > 0)
            {
                tempFileName =
                    Path.Combine(Path.GetDirectoryName(pathToFile),
                                 Path.GetFileNameWithoutExtension(pathToFile) + "_decrypted.pdf");
                if (Decryptor.DecryptFile(pathToFile, tempFileName, Passwords))
                {
                    pathToFile = tempFileName;
                }
            }
            using (var sourceStream = File.OpenRead(pathToFile))
            {
                var settings = new MagickReadSettings
                {
                    Density = new PointD(createPdfImageTaskParams.Dpi, createPdfImageTaskParams.Dpi)
                };
                settings.FrameIndex = 0; // First page
                settings.FrameCount = 1; // Number of pages
                MagickFormat imageFormat = TranslateFormat(createPdfImageTaskParams.Format);

                Logger.DebugFormat("Image format is {0}", imageFormat.ToString());
                using (var images = new MagickImageCollection())
                {
                    bool done = false;
                    if (!_firstDone)
                    {
                        lock (LockForInitializationIssue)
                        {
                            if (!_firstDone)
                            {
                                images.Read(sourceStream, settings);
                                done = true;
                            }
                        }
                    }

                    if (!done)
                    {
                        images.Read(sourceStream, settings);
                    }

                    var lastImage =
                        Math.Min(createPdfImageTaskParams.FromPage - 1 + createPdfImageTaskParams.Pages, images.Count) -
                        1;
                    for (int page = createPdfImageTaskParams.FromPage - 1; page <= lastImage; page++)
                    {
                        var image = images[page];
                        image.Format = imageFormat;

                        using (var ms = new MemoryStream())
                        {
                            image.Write(ms);
                            ms.Seek(0L, SeekOrigin.Begin);
                            await pageWriter(page + 1, ms).ConfigureAwait(false);
                        }
                    }
                }
            }
            if (!String.IsNullOrEmpty(tempFileName) && File.Exists(tempFileName))
            {
                File.Delete(tempFileName);
            }
            return(true);
        }
示例#10
0
        protected async override Task <ProcessResult> OnPolling(
            PollerJobParameters parameters,
            String workingFolder)
        {
            Boolean result;
            var     contentFileName = Path.ChangeExtension(parameters.FileName, ".content");

            if (!_formats.Contains(parameters.FileExtension))
            {
                Logger.DebugFormat("Document for job Id {0} has an extension not supported, setting null content", parameters.JobId);
                return(new ProcessResult(await AddNullContentFormat(parameters, contentFileName)));
            }

            Logger.DebugFormat("Starting tika on job: {0}, file extension {1}", parameters.JobId, parameters.FileExtension);

            Logger.DebugFormat("Downloading blob for job: {0}, on local path {1}", parameters.JobId, workingFolder);
            string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder);

            pathToFile = ProcessFile(pathToFile, workingFolder);

            Boolean shouldAnalyze = _filterManager.ShouldAnalyze(parameters.FileName, pathToFile);

            if (!shouldAnalyze)
            {
                Logger.InfoFormat("File {0} for job {1} was discharded!", parameters.FileName, parameters.JobId);
                return(new ProcessResult(await AddNullContentFormat(parameters, contentFileName)));
            }
            Logger.DebugFormat("Search for password JobId:{0}", parameters.JobId);
            var     passwords       = ClientPasswordSet.GetPasswordFor(parameters.FileName).ToArray();
            String  content         = "";
            Int32   analyzerOrdinal = 0;
            Boolean success         = false;

            var analyzer = BuildAnalyzer(analyzerOrdinal);

            do
            {
                try
                {
                    if (passwords.Any())
                    {
                        //Try with all the password
                        foreach (var password in passwords)
                        {
                            try
                            {
                                content = analyzer.GetHtmlContent(pathToFile, password) ?? "";
                                break; //first password that can decrypt file break the list of password to try
                            }
                            catch (Exception)
                            {
                                Logger.ErrorFormat("Error opening file {0} with password", parameters.FileName);
                            }
                        }
                    }
                    else
                    {
                        //Simply analyze file without password
                        Logger.DebugFormat("Analyze content JobId: {0} -> Path: {1}", parameters.JobId, pathToFile);
                        content = analyzer.GetHtmlContent(pathToFile, "") ?? "";
                    }
                    success = true;
                }
                catch (Exception ex)
                {
                    Logger.ErrorFormat(ex, "Error extracting tika with analyzer {0} on file {1}", analyzer.Describe(), parameters.FileName, parameters.JobId);
                    analyzer = BuildAnalyzer(++analyzerOrdinal);
                    if (analyzer != null)
                    {
                        Logger.InfoFormat("Retry job  {0} with analyzer {1}", parameters.JobId, analyzer.Describe());
                    }
                }
            } while (analyzer != null && success == false);

            Logger.DebugFormat("Finished tika on job: {0}, charsNum {1}", parameters.JobId, content.Count());
            String sanitizedContent = content;

            if (!string.IsNullOrWhiteSpace(content))
            {
                var resultContent   = _builder.CreateFromTikaPlain(content);
                var documentContent = resultContent.Content;
                sanitizedContent = resultContent.SanitizedTikaContent;
                var    pages = documentContent.Pages.Count();
                string lang  = null;
                if (pages > 1)
                {
                    lang = LanguageDetector.GetLanguage(documentContent.Pages[1].Content);
                }

                if (lang == null && pages == 1)
                {
                    lang = LanguageDetector.GetLanguage(documentContent.Pages[0].Content);
                }

                if (lang != null)
                {
                    documentContent.AddMetadata(DocumentContent.MedatataLanguage, lang);
                }

                result = await AddFormatToDocumentFromObject(
                    parameters.TenantId,
                    this.QueueName,
                    parameters.JobId,
                    new DocumentFormat(DocumentFormats.Content),
                    documentContent,
                    contentFileName,
                    new Dictionary <string, object>());

                Logger.DebugFormat("Added format {0} to jobId {1}, result: {2}", DocumentFormats.Content, parameters.JobId, result);
            }

            var tikaFileName = Path.Combine(workingFolder, Path.GetFileNameWithoutExtension(parameters.FileName) + ".tika.html");

            tikaFileName = SanitizeFileNameForLength(tikaFileName);
            File.WriteAllText(tikaFileName, sanitizedContent);
            result = await AddFormatToDocumentFromFile(
                parameters.TenantId,
                parameters.JobId,
                new DocumentFormat(DocumentFormats.Tika),
                tikaFileName,
                new Dictionary <string, object>());

            Logger.DebugFormat("Added format {0} to jobId {1}, result: {2}", DocumentFormats.Tika, parameters.JobId, result);

            return(ProcessResult.Ok);
        }