Example #1
0
        public static void Save(this HtmlSourceAndUrlCollection sources, folderNode folder, String filename = "htmlsource", Boolean deleteExisting = true)
        {
            if (deleteExisting)
            {
                folder.deleteFiles();
            }

            if (filename.isNullOrEmpty())
            {
                Int32 c = 0;
                foreach (var s in sources.items)
                {
                    s.Save(folder, c.ToString());
                    c++;
                }
            }
            else
            {
                foreach (var s in sources.items)
                {
                    s.Save(folder, filename);
                }
            }


            foreach (var sb in sources.SubCollections)
            {
                var f = folder.Add(sb.name, sb.name, "HTML sources subcollection of " + sources.name + ".");
                sb.Save(f, filename);
            }
        }
Example #2
0
        public static HtmlSourceAndUrlCollection LoadAllInSubfolders(this folderNode folder, String filename = "htmlsource", Boolean removeIncompleteEntries = true, Boolean removeEmptyDocuments = true)
        {
            HtmlSourceAndUrlCollection output = LoadAll(folder, filename, removeIncompleteEntries, removeEmptyDocuments);

            DirectoryInfo directory = folder;


            var allFiles = directory.GetFiles(filename + "*.html", SearchOption.AllDirectories);
            List <DirectoryInfo> subdirectories = new List <DirectoryInfo>();

            foreach (FileInfo fi in allFiles)
            {
                if (!subdirectories.Any(x => x.FullName == fi.DirectoryName))
                {
                    if (fi.Directory.FullName != directory.FullName)
                    {
                        subdirectories.Add(fi.Directory);
                    }
                }
            }

            foreach (folderNode subfolder in subdirectories)
            {
                output.SubCollections.Add(LoadAll(subfolder, filename, removeIncompleteEntries, removeEmptyDocuments));
            }

            return(output);
        }
Example #3
0
        /// <summary>
        /// Loads all items from folder
        /// </summary>
        /// <param name="folder">The folder.</param>
        /// <param name="filename">The filename.</param>
        /// <returns></returns>
        public static HtmlSourceAndUrlCollection LoadAll(this folderNode folder, String filename = "htmlsource", Boolean removeIncompleteEntries = true, Boolean setLocalFilepath = false, Boolean removeEmptyDocuments = true)
        {
            var files = folder.findFiles(filename + "*.*", SearchOption.TopDirectoryOnly);

            Dictionary <String, HtmlSourceAndUrl> loadedDictionary = new Dictionary <string, HtmlSourceAndUrl>();
            HtmlSourceAndUrlCollection            output           = new HtmlSourceAndUrlCollection();

            output.SetSourceInfo(folder, SearchOption.TopDirectoryOnly);

            foreach (String filepath in files)
            {
                var fn = Path.GetFileNameWithoutExtension(filepath);

                if (!loadedDictionary.ContainsKey(fn))
                {
                    loadedDictionary.Add(fn, new HtmlSourceAndUrl());
                }

                Load(filepath, loadedDictionary[fn]);
            }

            // detecting URL for not loaded one
            foreach (var pair in loadedDictionary)
            {
                if (pair.Value.url.isNullOrEmpty())
                {
                    pair.Value.url = DetectOriginURL(pair.Value.html);
                }
            }


            if (removeIncompleteEntries)
            {
                foreach (var pair in loadedDictionary)
                {
                    if (pair.Value.IsComplete)
                    {
                        output.items.Add(pair.Value);
                    }
                }
            }
            else
            {
                output.items.AddRange(loadedDictionary.Values);
            }

            if (setLocalFilepath)
            {
                foreach (var item in output.items)
                {
                    item.filepath = item.filepath.removeStartsWith(folder.path);
                }
            }

            if (removeEmptyDocuments)
            {
                foreach (var item in output.items.ToList())
                {
                    if (item.html.isNullOrEmpty())
                    {
                        output.items.Remove(item);
                    }
                }
            }

            return(output);
        }