public void SaveWebSite(WebSiteDocuments site, folderNode folder) { foreach (WebSiteDocument page in site.documents) { String filename = site.domain.add(page.path, "/"); filename = filename.Replace("//", "/"); filename = "http://" + filename; filename = GetFilenameFromURLPath(filename); filename = WebSiteDocumentsSetTools.GetSafeFilename(filename); String p = folder.pathFor(filename, imbSCI.Data.enums.getWritableFileMode.existing, "Page of [" + site.domain + "] at path [" + page.path + "]", false); String source = GetWebDocumentSource(page); if (!File.Exists(p)) { File.WriteAllText(p, source); } } }
/// <summary> /// Loads the web sites. /// </summary> /// <param name="category">The category.</param> /// <param name="di">The di.</param> /// <param name="logger">The logger.</param> private void LoadWebSites(WebDocumentsCategory category, DirectoryInfo di, WebDomainCategoryFormatOptions options, ILogBuilder logger = null) { FileInfo[] fileList = di.GetFiles(); Dictionary <String, List <FileInfo> > siteFilesIndex = new Dictionary <string, List <FileInfo> >(); if (fileList.Length > 1) { foreach (FileInfo fi in fileList) { String path = GetURLPathFromFilename(fi.Name); if (path.StartsWith("http")) { Match m = SelectDomainName.Match(path); if (m.Success) { String domain = m.Groups[1].Value; if (!siteFilesIndex.ContainsKey(domain)) { siteFilesIndex.Add(domain, new List <FileInfo>()); } siteFilesIndex[domain].Add(fi); } } } if (logger != null) { logger.log("Web sites detected: [" + siteFilesIndex.Count + "]"); } foreach (String k in siteFilesIndex.Keys) { WebSiteDocuments webSite = new WebSiteDocuments(k); List <String> k_list = new List <string>(); foreach (FileInfo fi in siteFilesIndex[k]) { WebSiteDocument d = LoadWebSiteDocument(fi, webSite, options); //if (fi.FullName[fi.FullName.Length - 1] == '7') //{ //} //String filename = webSite.domain.add(d.path, "/"); //filename = filename.Replace("//", "/"); //filename = "http://" + filename; //filename = GetFilenameFromURLPath(filename); //filename = WebSiteDocumentsSetTools.GetSafeFilename(filename); String AssociatedID = WebSiteDocumentsSetTools.GetPageURL(d, webSite); //WebSiteDocumentsSetTools.GetUrlSignature(webSite.domain + d.path); d.AssignedID = AssociatedID; if (k_list.Contains(d.AssignedID)) { } else { k_list.Add(d.AssignedID); webSite.documents.Add(d); } } category.siteDocuments.Add(webSite); if (logger != null) { logger.log(category.path + " -> [" + webSite.domain + "] -> pages [" + webSite.documents.Count + "]"); } } } }