public async Task DownloadMissingArchives(List <Archive> missing, bool download = true) { if (download) { var result = SendDownloadMetrics(missing); foreach (var a in missing.Where(a => a.State.GetType() == typeof(ManualDownloader.State))) { var outputPath = DownloadFolder.Combine(a.Name); await a.State.Download(a, outputPath); } } await missing.Where(a => a.State.GetType() != typeof(ManualDownloader.State)) .PMap(Queue, UpdateTracker, async archive => { Info($"Downloading {archive.Name}"); var outputPath = DownloadFolder.Combine(archive.Name); if (download) { if (outputPath.Exists) { var origName = Path.GetFileNameWithoutExtension(archive.Name); var ext = Path.GetExtension(archive.Name); var uniqueKey = archive.State.PrimaryKeyString.StringSha256Hex(); outputPath = DownloadFolder.Combine(origName + "_" + uniqueKey + "_" + ext); await outputPath.DeleteAsync(); } } return(await DownloadArchive(archive, download, outputPath)); }); }
private string ExtractText(string pdfFilePath) { string tempFolder = Path.Combine(Utils.GetTempFolder(), "dnGREP-PDF"); if (!Directory.Exists(tempFolder)) { Directory.CreateDirectory(tempFolder); } string tempFileName = Path.Combine(tempFolder, Path.GetFileNameWithoutExtension(pdfFilePath) + ".txt"); if (pdfFilePath.Length > 260 && !pdfFilePath.StartsWith(@"\\?\")) { pdfFilePath = @"\\?\" + pdfFilePath; } string options = GrepSettings.Instance.Get <string>(GrepSettings.Key.PdfToTextOptions); using (Process process = new Process()) { // use command prompt process.StartInfo.FileName = pathToPdfToText; process.StartInfo.Arguments = string.Format("{0} \"{1}\" \"{2}\"", options, pdfFilePath, tempFileName); process.StartInfo.UseShellExecute = false; process.StartInfo.WorkingDirectory = Utils.GetCurrentPath(typeof(GrepEnginePdf)); process.StartInfo.CreateNoWindow = true; // start cmd prompt, execute command process.Start(); process.WaitForExit(); if (process.ExitCode == 0) { return(tempFileName); } else { string errorMessage = string.Empty; switch (process.ExitCode) { case 1: errorMessage = "Error opening PDF file"; break; case 2: errorMessage = "Error opening an output file"; break; case 3: errorMessage = "Error related to PDF permissions"; break; default: errorMessage = "Unknown error"; break; } throw new Exception($"pdftotext returned '{errorMessage}' converting '{pdfFilePath}'"); } } }
public async Task <IActionResult> UploadFileStreaming(string Name) { var guid = Guid.NewGuid(); var key = Encoding.UTF8.GetBytes($"{Path.GetFileNameWithoutExtension(Name)}|{guid.ToString()}|{Path.GetExtension(Name)}").ToHex(); System.IO.File.Create(Path.Combine("public", "files", key)).Close(); Utils.Log($"Starting Ingest for {key}"); return(Ok(key)); }
public async Task <IActionResult> UploadFileStreaming(string Name) { var guid = Guid.NewGuid(); var key = Encoding.UTF8.GetBytes($"{Path.GetFileNameWithoutExtension(Name)}|{guid.ToString()}|{Path.GetExtension(Name)}").ToHex(); _writeLocks.GetOrAdd(key, new AsyncLock()); await using var fs = _settings.TempPath.Combine(key).Create(); Utils.Log($"Starting Ingest for {key}"); return(Ok(key)); }
private string MkLegalSizedPath(string basename, string typeIdStr) { const int PATH_MAX = 240; // must be less than 255 / 260 - see also https://kb.acronis.com/content/39790 string root = Path.GetDirectoryName(basename); string name = Path.GetFileName(basename); string dataname = Path.GetFileNameWithoutExtension(DataFile); string ext = SubStr(Path.GetExtension(DataFile), 1).Trim(); // produce the extension without leading dot if (ext.StartsWith("bib")) { ext = SubStr(ext, 3).Trim(); } if (ext.Length > 0) { ext = "." + ext; } // UNC long filename/path support by forcing this to be a UNC path: string filenamebase = $"{dataname}.{name}{ext}{ExtensionWithDot}"; // first make the full path without the approved/received, so that that bit doesn't make a difference // in the length check and subsequent decision to produce a shorthand filename path or not: // It's not always needed, but do the different shorthand conversions anyway and pick the longest fitting one: string short_tn = SanitizeFilename(CamelCaseShorthand(name)); string short_dn = SanitizeFilename(SubStr(dataname, 0, 10) + CamelCaseShorthand(dataname)); string hash = StreamMD5.FromText(filenamebase).ToUpper(); string short_hash = SubStr(hash, 0, Math.Max(6, 11 - short_tn.Length)); // this variant will fit in the length criterium, guaranteed: string alt_filepath0 = Path.GetFullPath(Path.Combine(root, $"{short_dn}.{short_hash}_{short_tn}{ext}{typeIdStr}{ExtensionWithDot}")); string filepath = alt_filepath0; // next, we construct the longer variants to check if they fit. // // DO NOTE that we create a path without typeIdStr part first, because we want both received and approved files to be based // on the *same* alt selection decision! string picked_alt_filepath = Path.GetFullPath(Path.Combine(root, $"{short_dn}.{short_hash}_{short_tn}{ext}.APPROVEDXYZ{ExtensionWithDot}")); name = SanitizeFilename(name); dataname = SanitizeFilename(dataname); string alt_filepath1 = Path.GetFullPath(Path.Combine(root, $"{short_dn}_{short_hash}.{name}{ext}.APPROVEDXYZ{ExtensionWithDot}")); if (alt_filepath1.Length < PATH_MAX) { filepath = Path.GetFullPath(Path.Combine(root, $"{short_dn}_{short_hash}.{name}{ext}{typeIdStr}{ExtensionWithDot}")); picked_alt_filepath = alt_filepath1; } // second alternative: only pick this one if it fits and produces a longer name: string alt_filepath2 = Path.GetFullPath(Path.Combine(root, $"{dataname}.{short_hash}_{short_tn}{ext}.APPROVEDXYZ{ExtensionWithDot}")); if (alt_filepath2.Length < PATH_MAX && alt_filepath2.Length > picked_alt_filepath.Length) { filepath = Path.GetFullPath(Path.Combine(root, $"{dataname}.{short_hash}_{short_tn}{ext}{typeIdStr}{ExtensionWithDot}")); picked_alt_filepath = alt_filepath2; } else { // third alt: the 'optimally trimmed' test name used as part of the filename: int trim_length = PATH_MAX - alt_filepath0.Length + 10 - 1; string short_dn2 = SanitizeFilename(SubStr(dataname, 0, trim_length) + CamelCaseShorthand(dataname)); string alt_filepath3 = Path.GetFullPath(Path.Combine(root, $"{short_dn2}.{short_hash}_{short_tn}{ext}{typeIdStr}{ExtensionWithDot}")); if (alt_filepath3.Length < PATH_MAX && alt_filepath3.Length > picked_alt_filepath.Length) { filepath = Path.GetFullPath(Path.Combine(root, $"{short_dn2}.{short_hash}_{short_tn}{ext}{typeIdStr}{ExtensionWithDot}")); picked_alt_filepath = alt_filepath3; } } // fourth alt: the full, unadulterated path; if it fits in the length criterium, take it anyway string alt_filepath4 = Path.GetFullPath(Path.Combine(root, $"{dataname}.{name}{ext}.APPROVEDXYZ{ExtensionWithDot}")); if (alt_filepath4.Length < PATH_MAX) { // UNC long filename/path support by forcing this to be a UNC path: filepath = Path.GetFullPath(Path.Combine(root, $"{dataname}.{name}{ext}{typeIdStr}{ExtensionWithDot}")); picked_alt_filepath = alt_filepath4; } return(filepath); }
private static void LoadPlugins() { lock (lockObj) { if (plugins == null) { plugins = new List <GrepPlugin>(); disabledPlugins.Clear(); string pluginPath = Path.Combine(Utils.GetCurrentPath(), "Plugins"); if (Directory.Exists(pluginPath)) { foreach (string pluginFile in Directory.GetFiles(pluginPath, "*.plugin", SearchOption.AllDirectories)) { try { GrepPlugin plugin = new GrepPlugin(pluginFile); if (plugin.LoadPluginSettings()) { if (FrameworkVersionsAreCompatible(plugin.FrameworkVersion, FrameworkVersion)) { if (plugin.Enabled) { plugins.Add(plugin); // many file extensions will map to the same pool of engines, // so keep a common key for the set of extensions foreach (string ext in plugin.Extensions) { string fileExtension = ext.TrimStart('.'); if (!poolKeys.ContainsKey(fileExtension)) { poolKeys.Add(fileExtension, plugin.PluginName); } } logger.Debug(string.Format("Loading plugin: {0} for extensions {1}", plugin.DllFilePath, string.Join(", ", plugin.Extensions.ToArray()))); } else { disabledPlugins.Add(plugin); logger.Debug(string.Format("Plugin skipped, not enabled: {0}", plugin.DllFilePath)); } } else { logger.Error(string.Format("Plugin '{0}' developed under outdated framework. Please update the plugin.", Path.GetFileNameWithoutExtension(pluginFile))); } } else { logger.Error(string.Format("Plugin {0} failed to load", plugin.DllFilePath)); } } catch (Exception ex) { logger.Log <Exception>(LogLevel.Error, "Failed to initialize " + Path.GetFileNameWithoutExtension(pluginFile) + " engine.", ex); } } } foreach (GrepPlugin plugin in plugins) { foreach (string extension in plugin.Extensions) { if (extension != null) { string fileExtension = extension.TrimStart('.'); if (!string.IsNullOrWhiteSpace(fileExtension) && !fileTypeEngines.ContainsKey(fileExtension)) { fileTypeEngines.Add(fileExtension, plugin); } } } } } } }
public static IList <FileData> EnumerateFiles(Stream input, string file, IList <string> includeSearchPatterns, FileFilter filter) { if (filter == null) { throw new ArgumentNullException(nameof(filter)); } List <FileData> results = new List <FileData>(); HashSet <string> hiddenDirectories = new HashSet <string>(); try { using (SevenZipExtractor extractor = new SevenZipExtractor(input)) { foreach (var fileInfo in extractor.ArchiveFileData) { FileData fileData = new FileData(file, fileInfo); var attr = (FileAttributes)fileInfo.Attributes; string innerFileName = fileInfo.FileName; int index = fileInfo.Index; if (innerFileName == "[no name]" && extractor.ArchiveFileData.Count == 1) { index = 0; innerFileName = Path.GetFileNameWithoutExtension(file); ArchiveFileInfo temp = Copy(fileInfo); temp.FileName = innerFileName; fileData = new FileData(file, temp); } if (fileInfo.IsDirectory) { if (!filter.IncludeHidden && attr.HasFlag(FileAttributes.Hidden) && !hiddenDirectories.Contains(innerFileName)) { hiddenDirectories.Add(innerFileName); } continue; } if (!filter.IncludeHidden) { string path = Path.GetDirectoryName(innerFileName); if (hiddenDirectories.Contains(path)) { continue; } } if (!filter.IncludeBinary) { using (Stream stream = new MemoryStream()) { extractor.ExtractFile(index, stream); stream.Seek(0, SeekOrigin.Begin); fileData.IsBinary = Utils.IsBinary(stream); } } if (includeSearchPatterns != null && includeSearchPatterns.Count > 0) { foreach (string pattern in includeSearchPatterns) { if (SafeDirectory.WildcardMatch(innerFileName, pattern, true)) { results.Add(fileData); } } } else { results.Add(fileData); } if (Utils.IsArchive(innerFileName)) { using (Stream stream = new MemoryStream()) { extractor.ExtractFile(index, stream); foreach (var item in EnumerateFiles(stream, file + ArchiveSeparator + innerFileName, includeSearchPatterns, filter)) { results.Add(item); } } } if (Utils.CancelSearch) { break; } } } } catch (Exception ex) { logger.Error(ex, string.Format(CultureInfo.CurrentCulture, "Failed to search inside archive '{0}'", file)); } return(results); }
internal static void RunUpgrade() { Logging.Info("Upgrading from 037 to 038"); string base_directory_path = BaseDirectoryForQiqqa; if (Directory.Exists(base_directory_path)) { int info_library_count, info_item_count; string[] library_directories = Directory.GetDirectories(base_directory_path); info_library_count = 0; foreach (string library_directory in library_directories) { ++info_library_count; Logging.Info("Inspecting directory {0}", library_directory); string documents_directory = Path.GetFullPath(Path.Combine(library_directory, @"documents")); string database_file = LibraryDB.GetLibraryDBPath(library_directory); string database_syncref_file = IntranetLibraryTools.GetLibraryMetadataPath(library_directory); // make sure we skip S3DB internet DB sync directories and only 'go through the upgrade process // when this looks like a viable (local) Qiqqa library: if (!File.Exists(database_file) && Directory.Exists(documents_directory) && !File.Exists(database_syncref_file)) { Logging.Warn("We have to upgrade {0}", library_directory); SQLiteUpgrade_LibraryDB library_db = new SQLiteUpgrade_LibraryDB(library_directory); using (var connection = library_db.GetConnection()) { connection.Open(); using (var transaction = connection.BeginTransaction()) { // Get a list of ALL the files in the documents directory... string[] full_filenames = Directory.GetFiles(documents_directory, "*.*", SearchOption.AllDirectories); info_item_count = 0; foreach (string full_filename in full_filenames) { ++info_item_count; StatusManager.Instance.UpdateStatus("DBUpgrade", String.Format("Upgrading library {0}/{1}", info_library_count, library_directories.Length), info_item_count, full_filenames.Length); string fingerprint = Path.GetFileNameWithoutExtension(full_filename); string extension = Path.GetExtension(full_filename).Trim('.'); if (EXTENSIONS.Contains(extension)) { Logging.Info("Upgrading {0}--{1}", fingerprint, extension); byte[] data = File.ReadAllBytes(full_filename); library_db.PutBlob(connection, transaction, fingerprint, extension, data); } else { Logging.Info("NOT upgrading {0}--{1}", fingerprint, extension); } } transaction.Commit(); } } } } } StatusManager.Instance.UpdateStatus("DBUpgrade", "Finished migrating libraries."); }
internal static void RunUpgrade(SplashScreenWindow splashscreen_window) { Logging.Info("Upgrading from 037 to 038"); string base_directory_path = BaseDirectoryForQiqqa; if (Directory.Exists(base_directory_path)) { int info_library_count, info_item_count; string[] library_directories = Directory.GetDirectories(base_directory_path); info_library_count = 0; foreach (string library_directory in library_directories) { ++info_library_count; Logging.Info("Inspecting directory {0}", library_directory); string documents_directory = Path.GetFullPath(Path.Combine(library_directory, @"documents")); string database_file = Path.GetFullPath(Path.Combine(library_directory, @"Qiqqa.library")); if (!File.Exists(database_file) && Directory.Exists(documents_directory)) { Logging.Warn("We have to upgrade {0}", library_directory); SQLiteUpgrade_LibraryDB library_db = new SQLiteUpgrade_LibraryDB(library_directory); using (var connection = library_db.GetConnection()) { connection.Open(); using (var transaction = connection.BeginTransaction()) { // Get a list of ALL the files in the documents directory... string[] full_filenames = Directory.GetFiles(documents_directory, "*.*", SearchOption.AllDirectories); info_item_count = 0; foreach (string full_filename in full_filenames) { ++info_item_count; splashscreen_window.UpdateMessage("Upgrading library {0}/{1}: {2:P0}", info_library_count, library_directories.Length, info_item_count / (double)full_filenames.Length); string fingerprint = Path.GetFileNameWithoutExtension(full_filename); string extension = Path.GetExtension(full_filename).Trim('.'); if (EXTENSIONS.Contains(extension)) { Logging.Info("Upgrading {0}--{1}", fingerprint, extension); byte[] data = File.ReadAllBytes(full_filename); library_db.PutBlob(connection, transaction, fingerprint, extension, data); } else { Logging.Info("NOT upgrading {0}--{1}", fingerprint, extension); } } transaction.Commit(); } } } } } splashscreen_window.UpdateMessage("Finished migrating libraries."); }
private static IEnumerable <FileData> EnumerateFiles(Stream input, string fileName, FileFilter fileFilter, bool checkEncoding, List <string> includeSearchPatterns, List <Regex> includeRegexPatterns, List <Regex> excludeRegexPatterns, List <Regex> includeShebangPatterns, HashSet <string> hiddenDirectories) { using (SevenZipExtractor extractor = new SevenZipExtractor(input, true)) { foreach (var fileInfo in extractor.ArchiveFileData) { FileData fileData = new FileData(fileName, fileInfo); var attr = (FileAttributes)fileInfo.Attributes; string innerFileName = fileInfo.FileName; int index = fileInfo.Index; if (innerFileName == "[no name]" && extractor.ArchiveFileData.Count == 1) { index = 0; innerFileName = Path.GetFileNameWithoutExtension(fileName); ArchiveFileInfo temp = Copy(fileInfo); temp.FileName = innerFileName; fileData = new FileData(fileName, temp); } if (fileInfo.IsDirectory) { if (!fileFilter.IncludeHidden && attr.HasFlag(FileAttributes.Hidden) && !hiddenDirectories.Contains(innerFileName)) { hiddenDirectories.Add(innerFileName + Path.DirectorySeparator); } continue; } if (!fileFilter.IncludeHidden) { if (attr.HasFlag(FileAttributes.Hidden)) { continue; } bool excludeFile = false; foreach (string dir in hiddenDirectories) { if (innerFileName.StartsWith(dir)) { excludeFile = true; break; } } if (excludeFile) { continue; } } if (Utils.IsArchive(innerFileName)) { using (Stream stream = new MemoryStream()) { extractor.ExtractFile(index, stream); var enumerator = EnumerateFiles(stream, fileName + ArchiveSeparator + innerFileName, fileFilter, checkEncoding, includeSearchPatterns, includeRegexPatterns, excludeRegexPatterns, includeShebangPatterns, hiddenDirectories).GetEnumerator(); while (true) { FileData ret = null; try { if (!enumerator.MoveNext()) { break; } ret = enumerator.Current; } catch (Exception ex) { string msg = string.Format(CultureInfo.CurrentCulture, "Failed to search inside archive '{0}'", fileName + ArchiveSeparator + innerFileName); logger.Error(ex, msg); fileData.ErrorMsg = msg + ": " + ex.Message; ret = fileData; } if (ret != null) { yield return(ret); } } } } else { if (IncludeFile(innerFileName, fileName + ArchiveSeparator + innerFileName, fileFilter, fileData, includeSearchPatterns, includeRegexPatterns, excludeRegexPatterns)) { if (NeedsIncludeFileStream(fileName, fileFilter, checkEncoding, includeSearchPatterns, includeShebangPatterns)) { using (Stream stream = new MemoryStream(4096)) { extractor.ExtractFile(index, stream); if (IncludeFileStream(stream, fileFilter, fileData, checkEncoding, includeShebangPatterns)) { yield return(fileData); } } } else { yield return(fileData); } } } if (Utils.CancelSearch) { break; } } } }
private static FilenameWithMetadataImport ConvertEndnoteToFilenameWithMetadataImport(string endnote_database_filename, MYDRecord record) { BibTexItem bibtex_item = new BibTexItem(); string type = "article"; TransformType(record.reference_type, ref type); bibtex_item.Type = type; bibtex_item.Key = BibTexTools.GenerateRandomBibTeXKey(); foreach (var pair in record.fields) { string key = pair.Key; string value = pair.Value; TransformKeyValue(record.reference_type, ref key, ref value); if ("notes" == key) { continue; } if ("keywords" == key) { continue; } if ("link_to_pdf" == key) { continue; } bibtex_item[key] = value; } FilenameWithMetadataImport fwmi = new FilenameWithMetadataImport(); fwmi.tags.Add("import_endnote"); fwmi.tags.Add("import_endnote_" + Path.GetFileNameWithoutExtension(endnote_database_filename)); fwmi.bibtex = bibtex_item.ToBibTex(); if (record.fields.ContainsKey("notes")) { fwmi.notes = record.fields["notes"]; } if (record.fields.ContainsKey("keywords")) { string keywords = record.fields["keywords"]; string[] tags = keywords.Split(new char[] { ' ', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); foreach (string tag in tags) { fwmi.tags.Add(tag); } } // Handle the attachments if (record.fields.ContainsKey("link_to_pdf")) { string links_string = record.fields["link_to_pdf"]; string[] links = links_string.Split(new string[] { ",", "internal-pdf://", "\r", "\n" }, StringSplitOptions.RemoveEmptyEntries); // Build up the list of candidates string base_directory = Path.GetFullPath(endnote_database_filename.Substring(0, endnote_database_filename.Length - 4) + @".Data/PDF"); List <string> pdf_links = new List <string>(); // First candidates are those in the subdirectory corresponding to the .ENL file foreach (string link in links) { pdf_links.Add(Path.GetFullPath(Path.Combine(base_directory, link))); } // Second candidates are raw pathnames foreach (string link in links) { pdf_links.Add(link); } // Use the first PDF file that exists in the file system foreach (string pdf_link in pdf_links) { if (pdf_link.ToLower().EndsWith(".pdf") && File.Exists(pdf_link)) { fwmi.filename = pdf_link; break; } } } return(fwmi); }
private IEnumerable <List <GrepSearchResult> > SearchInsideArchive(Stream input, string fileName, string searchPattern, SearchType searchType, GrepSearchOption searchOptions, Encoding encoding) { using (SevenZipExtractor extractor = new SevenZipExtractor(input, true)) { foreach (var fileInfo in extractor.ArchiveFileData) { FileData fileData = new FileData(fileName, fileInfo); var attr = (FileAttributes)fileInfo.Attributes; string innerFileName = fileInfo.FileName; int index = fileInfo.Index; if (innerFileName == "[no name]" && extractor.ArchiveFileData.Count == 1) { index = 0; innerFileName = Path.GetFileNameWithoutExtension(fileName); ArchiveFileInfo temp = ArchiveDirectory.Copy(fileInfo); temp.FileName = innerFileName; fileData = new FileData(fileName, temp); } if (fileInfo.IsDirectory) { if (!fileFilter.IncludeHidden && attr.HasFlag(FileAttributes.Hidden) && !hiddenDirectories.Contains(innerFileName)) { hiddenDirectories.Add(innerFileName + Path.DirectorySeparator); } continue; } if (!fileFilter.IncludeHidden) { if (attr.HasFlag(FileAttributes.Hidden)) { continue; } bool excludeFile = false; foreach (string dir in hiddenDirectories) { if (innerFileName.StartsWith(dir)) { excludeFile = true; break; } } if (excludeFile) { continue; } } if (Utils.IsArchive(innerFileName)) { using (Stream stream = new MemoryStream(4096)) { extractor.ExtractFile(index, stream); var enumerator = SearchInsideArchive(stream, fileName + ArchiveDirectory.ArchiveSeparator + innerFileName, searchPattern, searchType, searchOptions, encoding).GetEnumerator(); while (true) { List <GrepSearchResult> ret = null; try { if (!enumerator.MoveNext()) { break; } ret = enumerator.Current; } catch (Exception ex) { string msg = string.Format(CultureInfo.CurrentCulture, "Failed to search inside archive '{0}'", fileName + ArchiveDirectory.ArchiveSeparator + innerFileName); logger.Error(ex, msg); fileData.ErrorMsg = msg + ": " + ex.Message; ret = new List <GrepSearchResult> { new GrepSearchResult(fileData, encoding) }; } if (ret != null) { yield return(ret); } } } } else { if (ArchiveDirectory.IncludeFile(innerFileName, fileName + ArchiveDirectory.ArchiveSeparator + innerFileName, fileFilter, fileData, includeSearchPatterns, includeRegexPatterns, excludeRegexPatterns)) { var res = SearchInnerFile(extractor, index, fileFilter, fileData, fileName + ArchiveDirectory.ArchiveSeparator + innerFileName, searchPattern, searchType, searchOptions, encoding); if (res != null) { yield return(res); } } } if (Utils.CancelSearch) { break; } } } }
public override string GetFileNameWithoutExtension(string path) { return(AfsPath.GetFileNameWithoutExtension(path)); }