public static void Save(this HtmlSourceAndUrlCollection sources, folderNode folder, String filename = "htmlsource", Boolean deleteExisting = true) { if (deleteExisting) { folder.deleteFiles(); } if (filename.isNullOrEmpty()) { Int32 c = 0; foreach (var s in sources.items) { s.Save(folder, c.ToString()); c++; } } else { foreach (var s in sources.items) { s.Save(folder, filename); } } foreach (var sb in sources.SubCollections) { var f = folder.Add(sb.name, sb.name, "HTML sources subcollection of " + sources.name + "."); sb.Save(f, filename); } }
public static HtmlSourceAndUrlCollection LoadAllInSubfolders(this folderNode folder, String filename = "htmlsource", Boolean removeIncompleteEntries = true, Boolean removeEmptyDocuments = true) { HtmlSourceAndUrlCollection output = LoadAll(folder, filename, removeIncompleteEntries, removeEmptyDocuments); DirectoryInfo directory = folder; var allFiles = directory.GetFiles(filename + "*.html", SearchOption.AllDirectories); List <DirectoryInfo> subdirectories = new List <DirectoryInfo>(); foreach (FileInfo fi in allFiles) { if (!subdirectories.Any(x => x.FullName == fi.DirectoryName)) { if (fi.Directory.FullName != directory.FullName) { subdirectories.Add(fi.Directory); } } } foreach (folderNode subfolder in subdirectories) { output.SubCollections.Add(LoadAll(subfolder, filename, removeIncompleteEntries, removeEmptyDocuments)); } return(output); }
/// <summary> /// Loads all items from folder /// </summary> /// <param name="folder">The folder.</param> /// <param name="filename">The filename.</param> /// <returns></returns> public static HtmlSourceAndUrlCollection LoadAll(this folderNode folder, String filename = "htmlsource", Boolean removeIncompleteEntries = true, Boolean setLocalFilepath = false, Boolean removeEmptyDocuments = true) { var files = folder.findFiles(filename + "*.*", SearchOption.TopDirectoryOnly); Dictionary <String, HtmlSourceAndUrl> loadedDictionary = new Dictionary <string, HtmlSourceAndUrl>(); HtmlSourceAndUrlCollection output = new HtmlSourceAndUrlCollection(); output.SetSourceInfo(folder, SearchOption.TopDirectoryOnly); foreach (String filepath in files) { var fn = Path.GetFileNameWithoutExtension(filepath); if (!loadedDictionary.ContainsKey(fn)) { loadedDictionary.Add(fn, new HtmlSourceAndUrl()); } Load(filepath, loadedDictionary[fn]); } // detecting URL for not loaded one foreach (var pair in loadedDictionary) { if (pair.Value.url.isNullOrEmpty()) { pair.Value.url = DetectOriginURL(pair.Value.html); } } if (removeIncompleteEntries) { foreach (var pair in loadedDictionary) { if (pair.Value.IsComplete) { output.items.Add(pair.Value); } } } else { output.items.AddRange(loadedDictionary.Values); } if (setLocalFilepath) { foreach (var item in output.items) { item.filepath = item.filepath.removeStartsWith(folder.path); } } if (removeEmptyDocuments) { foreach (var item in output.items.ToList()) { if (item.html.isNullOrEmpty()) { output.items.Remove(item); } } } return(output); }