/// <summary>
 /// Saves the subcategories.
 /// </summary>
 /// <param name="category">The category.</param>
 /// <param name="rootFolder">The root folder.</param>
 protected void SaveSubcategories(WebDocumentsCategory category, folderNode rootFolder, WebDomainCategoryFormatOptions options, ILogBuilder logger = null)
 {
     foreach (WebDocumentsCategory subcat in category)
     {
         SaveWebSites(subcat, rootFolder, options);
     }
 }
        /// <summary>
        /// Loads the dataset.
        /// </summary>
        /// <param name="path">The path.</param>
        /// <param name="logger">The logger.</param>
        /// <returns></returns>
        public WebDocumentsCategory LoadDataset(String path, WebDomainCategoryFormatOptions options, ILogBuilder logger = null)
        {
            WebDocumentsCategory output = new WebDocumentsCategory();

            if (path.isNullOrEmpty())
            {
                throw new ArgumentException("Path is empty or null", nameof(path));
            }

            DirectoryInfo dir = new DirectoryInfo(path);

            output.name = dir.Name;

            if (!dir.Exists)
            {
                if (logger != null)
                {
                    logger.log("Directory " + path + " not found!");
                }
                return(output);
            }

            LoadDirectory(output, dir, options, logger);

            return(output);
        }
Ejemplo n.º 3
0
        public override graphNodeCustom CreateChildItem(string nameForChild)
        {
            WebDocumentsCategory output = new WebDocumentsCategory();

            output.name = nameForChild;
            Add(output);
            return(output);
        }
Ejemplo n.º 4
0
        public WebDocumentsCategory GetOrAdd(String __path, Boolean isAbsolute)
        {
            WebDocumentsCategory cat = graphTools.ConvertPathToGraph <WebDocumentsCategory>(this, __path, isAbsolute, pathSeparator, true);  //Add(pathForCategory) as WebDomainCategory;

            if (cat == this)
            {
            }

            return(cat);
        }
 /// <summary>
 /// Loads the directory.
 /// </summary>
 /// <param name="category">The category.</param>
 /// <param name="di">The di.</param>
 /// <param name="options">The options.</param>
 /// <param name="logger">The logger.</param>
 private void LoadDirectory(WebDocumentsCategory category, DirectoryInfo di, WebDomainCategoryFormatOptions options, ILogBuilder logger = null)
 {
     DirectoryInfo[] dirList = di.GetDirectories();
     foreach (DirectoryInfo dir in dirList)
     {
         WebDocumentsCategory child = category.CreateChildItem(dir.Name) as WebDocumentsCategory;
         LoadWebSites(child, dir, options, logger);
         LoadDirectory(child, dir, options, logger);
     }
 }
Ejemplo n.º 6
0
        /// <summary>
        /// Sets the categories by enumerable dataset
        /// </summary>
        /// <param name="categorySet">The category set.</param>
        public void SetCategoryByDataset(IEnumerable <WebSiteDocumentsSet> categorySet)
        {
            foreach (WebSiteDocumentsSet category in categorySet)
            {
                WebDocumentsCategory catChild = GetOrAdd(category.name, false);

                foreach (WebSiteDocuments site in category)
                {
                    var existingSite = catChild.siteDocuments.FirstOrDefault(x => x.domain == site.domain);

                    if (existingSite != null)
                    {
                        catChild.siteDocuments.Remove(existingSite);
                    }

                    catChild.siteDocuments.Add(site);
                }
            }
        }
        /// <summary>
        /// Saves the dataset.
        /// </summary>
        /// <param name="dataset">The dataset.</param>
        /// <param name="path">The path.</param>
        public void SaveDataset(WebDocumentsCategory dataset, String path, WebDomainCategoryFormatOptions options, ILogBuilder logger = null)
        {
            folderNode folder = new DirectoryInfo(path);

            folder.description = dataset.description.add(description, Environment.NewLine);


            SaveWebSites(dataset, folder, options);

            if (options.HasFlag(WebDomainCategoryFormatOptions.saveGraphAtRoot))
            {
                var dmgl = GraphConverters.documentsConverter.Convert(dataset, 300); // imbSCI.Graph.Converters.GraphConversionTools.ConvertToDGML<WebDocumentsCategory>(dataset, 300);
                dmgl.Save(folder.pathFor("dataset", imbSCI.Data.enums.getWritableFileMode.overwrite, "Directed graph of categories in the dataset", true));
                var dot = imbSCI.Graph.Converters.GraphConversionTools.ConvertToDOT(dmgl);
                dot.Save(folder.pathFor("dataset_dot", imbSCI.Data.enums.getWritableFileMode.existing, "DOT graph of categories in the dataset", true));
                //var mxgraph = imbSCI.Graph.MXGraph.directedGraphToMXGraph.ConvertToMXGraph(dmgl);
            }

            if (options.HasFlag(WebDomainCategoryFormatOptions.saveReadmeFile))
            {
                folder.generateReadmeFiles(imbACE.Core.appManager.AppInfo);
            }
        }
        /// <summary>
        /// Saves the web sites.
        /// </summary>
        /// <param name="category">The category.</param>
        /// <param name="rootFolder">The root folder.</param>
        protected void SaveWebSites(WebDocumentsCategory category, folderNode rootFolder, WebDomainCategoryFormatOptions options, ILogBuilder logger = null)
        {
            folderNode    folder     = rootFolder.Add(category.name, category.name, category.description);
            StringBuilder domainList = new StringBuilder();

            foreach (WebSiteDocuments site in category.siteDocuments)
            {
                domainList.AppendLine(site.domain);
                SaveWebSite(site, folder);

                /*
                 * foreach (WebSiteDocument page in site.documents)
                 * {
                 *
                 *  String filename = site.domain.add(page.path, "/");
                 *  filename = filename.Replace("//", "/");
                 *  filename = "http://" + filename;
                 *  filename = GetFilenameFromURLPath(filename);
                 *  filename = WebSiteDocumentsSetTools.GetSafeFilename(filename);
                 *
                 *  String p = folder.pathFor(filename, imbSCI.Data.enums.getWritableFileMode.existing, "Page of [" + site.domain + "] at path [" + page.path + "]", false);
                 *
                 *  String source = GetWebDocumentSource(page);
                 *  if (!File.Exists(p))
                 *  {
                 *      File.WriteAllText(p, source);
                 *  }
                 * }*/
            }

            if (options.HasFlag(WebDomainCategoryFormatOptions.saveDomainList))
            {
                File.WriteAllText(folder.pathFor(WebDomainCategory.categorySiteList, imbSCI.Data.enums.getWritableFileMode.overwrite, "Domains in category [" + category.path + "]", true), domainList.ToString());
            }

            SaveSubcategories(category, folder, options);
        }
        /// <summary>
        /// Loads the web sites.
        /// </summary>
        /// <param name="category">The category.</param>
        /// <param name="di">The di.</param>
        /// <param name="logger">The logger.</param>
        private void LoadWebSites(WebDocumentsCategory category, DirectoryInfo di, WebDomainCategoryFormatOptions options, ILogBuilder logger = null)
        {
            FileInfo[] fileList = di.GetFiles();

            Dictionary <String, List <FileInfo> > siteFilesIndex = new Dictionary <string, List <FileInfo> >();

            if (fileList.Length > 1)
            {
                foreach (FileInfo fi in fileList)
                {
                    String path = GetURLPathFromFilename(fi.Name);
                    if (path.StartsWith("http"))
                    {
                        Match m = SelectDomainName.Match(path);
                        if (m.Success)
                        {
                            String domain = m.Groups[1].Value;
                            if (!siteFilesIndex.ContainsKey(domain))
                            {
                                siteFilesIndex.Add(domain, new List <FileInfo>());
                            }

                            siteFilesIndex[domain].Add(fi);
                        }
                    }
                }

                if (logger != null)
                {
                    logger.log("Web sites detected: [" + siteFilesIndex.Count + "]");
                }



                foreach (String k in siteFilesIndex.Keys)
                {
                    WebSiteDocuments webSite = new WebSiteDocuments(k);

                    List <String> k_list = new List <string>();

                    foreach (FileInfo fi in siteFilesIndex[k])
                    {
                        WebSiteDocument d = LoadWebSiteDocument(fi, webSite, options);

                        //if (fi.FullName[fi.FullName.Length - 1] == '7')
                        //{

                        //}


                        //String filename = webSite.domain.add(d.path, "/");
                        //filename = filename.Replace("//", "/");
                        //filename = "http://" + filename;
                        //filename = GetFilenameFromURLPath(filename);
                        //filename = WebSiteDocumentsSetTools.GetSafeFilename(filename);



                        String AssociatedID = WebSiteDocumentsSetTools.GetPageURL(d, webSite);  //WebSiteDocumentsSetTools.GetUrlSignature(webSite.domain + d.path);
                        d.AssignedID = AssociatedID;
                        if (k_list.Contains(d.AssignedID))
                        {
                        }
                        else
                        {
                            k_list.Add(d.AssignedID);

                            webSite.documents.Add(d);
                        }
                    }

                    category.siteDocuments.Add(webSite);

                    if (logger != null)
                    {
                        logger.log(category.path + " -> [" + webSite.domain + "] -> pages [" + webSite.documents.Count + "]");
                    }
                }
            }
        }