public List <WebDirectoryIteration> Load(WebDirectoryIteration iteration, ILogBuilder logger = null) { HtmlAgilityPack.HtmlWeb web = new HtmlWeb(); HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc = web.Load(iteration.URL); Process(htmlDoc, iteration); List <WebDirectoryIteration> output = new List <WebDirectoryIteration>(); foreach (String path in iteration.SubdirectoryList) { if (iteration.DirectoryNode.level < DepthLimit) { WebDirectoryIteration newIteration = new WebDirectoryIteration("https://" + HomeDomain + path); WebDomainCategory subNode = iteration.DirectoryNode.CreateChildItem(path.Replace(iteration.DirectoryPath, "").Trim('/')) as WebDomainCategory; newIteration.DirectoryNode = subNode; newIteration.DirectoryPath = path; output.Add(newIteration); } } iteration.DirectoryNode.sites.AddRange(iteration.WebsiteList); return(output); }
public void Start(String startingURL, ILogBuilder logger = null) { WebDomainCategory node = result; WebDirectoryIteration iteration = new WebDirectoryIteration(); Match m = SelectPath.Match(startingURL); iteration.URL = startingURL; iteration.DirectoryPath = m.Groups[1].Value; node.name = iteration.DirectoryPath.Trim('/'); iteration.DirectoryNode = node; List <WebDirectoryIteration> tasks = new List <WebDirectoryIteration>(); tasks.Add(iteration); while (tasks.Any()) { List <WebDirectoryIteration> newTasks = new List <WebDirectoryIteration>(); foreach (WebDirectoryIteration task in tasks) { newTasks.AddRange(Load(task, logger)); } logger.log("Tasks done [" + tasks.Count + "] - new tasks [" + newTasks.Count + "]"); tasks = newTasks; } }
/// <summary> /// Loads the specified folder. /// </summary> /// <param name="folder">The folder.</param> /// <param name="options">The options.</param> public void Load(String path, WebDomainCategoryFormatOptions options = WebDomainCategoryFormatOptions.saveReadmeFile | WebDomainCategoryFormatOptions.saveAggregate | WebDomainCategoryFormatOptions.normalizeDomainname, ILogBuilder logger = null) { DirectoryInfo di = new DirectoryInfo(path); FileInfo rootList = di.GetFiles(categorySiteList, SearchOption.TopDirectoryOnly).FirstOrDefault(); //folder.findFile(categorySiteList, SearchOption.TopDirectoryOnly); if (rootList != null) { LoadDomainList(rootList.FullName, options); } List <FileInfo> sampleFiles = di.GetFiles(categorySiteList, SearchOption.AllDirectories).ToList(); foreach (var fi in sampleFiles) { String pathForCategory = fi.DirectoryName.removeStartsWith(di.FullName); //Path.GetDirectoryName(fi).removeStartsWith(folder.path); WebDomainCategory cat = graphTools.ConvertPathToGraph <WebDomainCategory>(this, pathForCategory, false, Path.DirectorySeparatorChar.ToString()); //Add(pathForCategory) as WebDomainCategory; cat.LoadDomainList(fi.FullName, options, logger); //fi.FullName.Remove(folder.) } }
public override graphNodeCustom CreateChildItem(string nameForChild) { WebDomainCategory output = new WebDomainCategory(); output.name = nameForChild; Add(output); return(output); }
public WebDomainCategory AddCategory(List <String> domainList, String _name, String _description, ILogBuilder logger) { WebDomainCategory output = Add(_name) as WebDomainCategory; output.sites.AddRange(domainList); output.description = _description; return(output); }
/// <summary> /// Gets the domain category. /// </summary> /// <param name="parent">The parent.</param> /// <returns></returns> public WebDomainCategory GetDomainCategory(WebDomainCategory parent = null) { if (parent == null) { parent = new WebDomainCategory(name); } parent.sites.AddRange(siteDocuments.Select(x => x.domain)); foreach (WebDocumentsCategory cat in this) { WebDomainCategory subParent = parent.CreateChildItem(cat.name) as WebDomainCategory; cat.GetDomainCategory(subParent); } return(parent); }