public List <WebDirectoryIteration> Load(WebDirectoryIteration iteration, ILogBuilder logger = null)
        {
            HtmlAgilityPack.HtmlWeb web = new HtmlWeb();

            HtmlDocument htmlDoc = new HtmlDocument();

            htmlDoc = web.Load(iteration.URL);

            Process(htmlDoc, iteration);

            List <WebDirectoryIteration> output = new List <WebDirectoryIteration>();

            foreach (String path in iteration.SubdirectoryList)
            {
                if (iteration.DirectoryNode.level < DepthLimit)
                {
                    WebDirectoryIteration newIteration = new WebDirectoryIteration("https://" + HomeDomain + path);
                    WebDomainCategory     subNode      = iteration.DirectoryNode.CreateChildItem(path.Replace(iteration.DirectoryPath, "").Trim('/')) as WebDomainCategory;
                    newIteration.DirectoryNode = subNode;
                    newIteration.DirectoryPath = path;
                    output.Add(newIteration);
                }
            }

            iteration.DirectoryNode.sites.AddRange(iteration.WebsiteList);

            return(output);
        }
        public void Start(String startingURL, ILogBuilder logger = null)
        {
            WebDomainCategory node = result;

            WebDirectoryIteration iteration = new WebDirectoryIteration();
            Match m = SelectPath.Match(startingURL);

            iteration.URL           = startingURL;
            iteration.DirectoryPath = m.Groups[1].Value;

            node.name = iteration.DirectoryPath.Trim('/');

            iteration.DirectoryNode = node;

            List <WebDirectoryIteration> tasks = new List <WebDirectoryIteration>();

            tasks.Add(iteration);

            while (tasks.Any())
            {
                List <WebDirectoryIteration> newTasks = new List <WebDirectoryIteration>();

                foreach (WebDirectoryIteration task in tasks)
                {
                    newTasks.AddRange(Load(task, logger));
                }
                logger.log("Tasks done [" + tasks.Count + "] - new tasks [" + newTasks.Count + "]");
                tasks = newTasks;
            }
        }
        /// <summary>
        /// Loads the specified folder.
        /// </summary>
        /// <param name="folder">The folder.</param>
        /// <param name="options">The options.</param>
        public void Load(String path, WebDomainCategoryFormatOptions options = WebDomainCategoryFormatOptions.saveReadmeFile | WebDomainCategoryFormatOptions.saveAggregate | WebDomainCategoryFormatOptions.normalizeDomainname, ILogBuilder logger = null)
        {
            DirectoryInfo di = new DirectoryInfo(path);



            FileInfo rootList = di.GetFiles(categorySiteList, SearchOption.TopDirectoryOnly).FirstOrDefault(); //folder.findFile(categorySiteList, SearchOption.TopDirectoryOnly);

            if (rootList != null)
            {
                LoadDomainList(rootList.FullName, options);
            }

            List <FileInfo> sampleFiles = di.GetFiles(categorySiteList, SearchOption.AllDirectories).ToList();

            foreach (var fi in sampleFiles)
            {
                String pathForCategory = fi.DirectoryName.removeStartsWith(di.FullName); //Path.GetDirectoryName(fi).removeStartsWith(folder.path);



                WebDomainCategory cat = graphTools.ConvertPathToGraph <WebDomainCategory>(this, pathForCategory, false, Path.DirectorySeparatorChar.ToString());  //Add(pathForCategory) as WebDomainCategory;
                cat.LoadDomainList(fi.FullName, options, logger);

                //fi.FullName.Remove(folder.)
            }
        }
        public override graphNodeCustom CreateChildItem(string nameForChild)
        {
            WebDomainCategory output = new WebDomainCategory();

            output.name = nameForChild;
            Add(output);
            return(output);
        }
        public WebDomainCategory AddCategory(List <String> domainList, String _name, String _description, ILogBuilder logger)
        {
            WebDomainCategory output = Add(_name) as WebDomainCategory;

            output.sites.AddRange(domainList);
            output.description = _description;
            return(output);
        }
Exemplo n.º 6
0
        /// <summary>
        /// Gets the domain category.
        /// </summary>
        /// <param name="parent">The parent.</param>
        /// <returns></returns>
        public WebDomainCategory GetDomainCategory(WebDomainCategory parent = null)
        {
            if (parent == null)
            {
                parent = new WebDomainCategory(name);
            }
            parent.sites.AddRange(siteDocuments.Select(x => x.domain));

            foreach (WebDocumentsCategory cat in this)
            {
                WebDomainCategory subParent = parent.CreateChildItem(cat.name) as WebDomainCategory;
                cat.GetDomainCategory(subParent);
            }
            return(parent);
        }