/// <summary>
        /// Call this method to execute all test methods, declared in child class
        /// </summary>
        /// <param name="resultsNode">The results node.</param>
        public void ExecuteTest(folderNode resultsNode = null)
        {
            imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(log, GetType().Name);

            if (resultsNode != null)
            {
                folderResults = resultsNode;
            }

            log.log("-- Starting test [" + GetType().Name + "]");

            foreach (MethodInfo mi in GetType().GetMethods(System.Reflection.BindingFlags.Public | System.Reflection.BindingFlags.DeclaredOnly | System.Reflection.BindingFlags.Instance))
            {
                folderNode baseResults = folderResults;
                folderResults = baseResults.Add(mi.Name, mi.Name.imbTitleCamelOperation(true), "Results of test method [" + mi.Name + "]");
                if (!mi.GetParameters().Any())
                {
                    log.log("-- Starting test method [" + mi.Name + "]");
                    try
                    {
                        mi.Invoke(this, null);
                    }
                    catch (Exception ex)
                    {
                        log.log("-- : " + ex.LogException("Test failed[" + mi.Name + "]", GetType().Name + " -"));
                    }
                }

                folderResults = baseResults;
            }

            Done();
        }
Esempio n. 2
0
        private void reportTarget(spiderTarget t, folderNode fn, int c)
        {
            string     pageFolder = "P" + c.ToString("D3") + "_" + t.IsRelevant.ToString();
            folderNode pfn        = fn.Add(pageFolder, "Page " + c.ToString(), "Report on page " + t.url + " crawled by " + name + ". Target.IsRelevant: " + t.IsRelevant + ".".addLine(pageDescription));

            fileunit content = new fileunit(pfn.pathFor("content.txt"), false);
            fileunit links   = new fileunit(pfn.pathFor("links.txt"), false);

            if (t.evaluation != null)
            {
                t.evaluation.saveObjectToXML(pfn.pathFor("relevance.xml"));
            }

            content.setContent(t.pageText);
            //t.page.relationship.outflowLinks
            if (t.page != null)
            {
                foreach (spiderLink ln in t.page.relationship.outflowLinks.items.Values)
                {
                    string rl = ln.url;

                    links.Append(ln.url);
                }

                //t.page.webpage.links.ForEach(x => links.Append(x.nature + " | " + x.name + " | " + x.url));
            }
            content.Save();
            links.Save();
            //  marks.Save();
        }
Esempio n. 3
0
        //public Dictionary<HtmlNode, DocumentCluster> GetClusterByDocumentDictionary()
        //{
        //    Dictionary<HtmlNode, DocumentCluster> output = new Dictionary<HtmlNode, DocumentCluster>();

        //    foreach (DocumentCluster cluster in this.)
        //    {
        //        var nodes = cluster.items.Select(x => x);

        //        foreach (HtmlNode node in nodes)
        //        {
        //            context.DeclarationConstruction_ClusterAnalysisContext.ClusterByDocuments.Add(node, cluster);
        //        }

        //        if (cluster.ClusterSeed != null)
        //        {
        //            context.DeclarationConstruction_ClusterAnalysisContext.ClusterByDocuments.Add(cluster.ClusterSeed, cluster);
        //        }
        //    }
        //}

        public void Publish(Dictionary <HtmlNode, HtmlSourceAndUrl> documentNodeDictionary, folderNode folderWithResults, DocumentSimilarityResult result)
        {
            folderWithResults.generateReadmeFiles(null);
            var items = GetClusters <DocumentCluster>(true);

            Dictionary <HtmlNode, string> labelsByDocument = result.GetLabelsByDocument();

            if (!name.isNullOrEmpty())
            {
                folderWithResults = folderWithResults.Add(name, name, "Reports for cluster collection " + name);
            }

            builderForText reporter = new builderForText();

            foreach (DocumentCluster cluster in items)
            {
                cluster.Publish(labelsByDocument, documentNodeDictionary, folderWithResults, result);

                reporter.AppendPair(cluster.name, cluster.items.Count);
                reporter.AppendPair("- range", cluster.range.Range);
            }


            String reportPath    = folderWithResults.pathFor("report.txt", imbSCI.Data.enums.getWritableFileMode.overwrite);
            String reportContent = reporter.GetContent();


            File.WriteAllText(reportPath, reportContent);
        }
Esempio n. 4
0
        private void SetFolder(Int32 i = 0)
        {
            if (parent == null)
            {
                throw new ArgumentException(nameof(parent), "Parent state or Project must be set with construction of this state object!");
            }
            folderNode parentFolder = parent.folder;

            if (parentFolder == null)
            {
                throw new ArgumentException(nameof(Project), "Parent state or Project must be set with construction of this state object!");
            }
            if (_folder == null)
            {
                if (HasSubfolder)
                {
                    _folder = parentFolder.Add(name, name, "Project state [" + Info.displayName + "] data." + Info.description);
                }
                else
                {
                    _folder = parentFolder;
                }
            }
            if (_folder != null)
            {
                if (!_folder.path.StartsWith(parentFolder.path))
                {
                    if (i > 0)
                    {
                        throw new Exception("Folder [" + _folder.path + "] is not child of [" + parentFolder.path + "] -- after [" + i.ToString() + "] iterations of autosetup");
                    }
                    SetFolder(i + 1);
                }
            }
        }
Esempio n. 5
0
        public static void Save(this HtmlSourceAndUrlCollection sources, folderNode folder, String filename = "htmlsource", Boolean deleteExisting = true)
        {
            if (deleteExisting)
            {
                folder.deleteFiles();
            }

            if (filename.isNullOrEmpty())
            {
                Int32 c = 0;
                foreach (var s in sources.items)
                {
                    s.Save(folder, c.ToString());
                    c++;
                }
            }
            else
            {
                foreach (var s in sources.items)
                {
                    s.Save(folder, filename);
                }
            }


            foreach (var sb in sources.SubCollections)
            {
                var f = folder.Add(sb.name, sb.name, "HTML sources subcollection of " + sources.name + ".");
                sb.Save(f, filename);
            }
        }
Esempio n. 6
0
        public void Init(folderNode _rootFolder, ITextRender logger)
        {
            storageFolder = _rootFolder;

            Finance = new FileSystemRecordProvider <FinanceOverviewRecords, FinanceOverview>(storageFolder.Add(nameof(Finance), nameof(Finance), "Records storage"), "fin_", "fin_");

            InternationalTrade = new FileSystemRecordProvider <InternationalTradeRecords, InternationalTradeByCountry>(storageFolder.Add(nameof(InternationalTrade), nameof(InternationalTrade), "Records storage"), "int_", "int_");

            BankAccounts = new FileSystemRecordProvider <CompanyBankAccountCollection, bankAccount>(
                storageFolder.Add(nameof(BankAccounts), nameof(BankAccounts), "Records storage"), "ban_", "ban_");

            Persons = new FileSystemRecordProvider <CompanyPersonCollection, Person>(
                storageFolder.Add(nameof(Persons), nameof(Persons), "Records storage"), "per_", "per_");

            Companies = new FileSystemRecordProvider <CompanyInformationCollection, CompanyInformation>(storageFolder, "acc_", "acc_");
            Companies.OperationMode = RecordProviderOperationMode.singleCollectionMode;
        }
        protected void deployFolders(folderNode _folder, String _name)
        {
            folder         = _folder;
            name           = _name;
            StructureModel = new reportStructureModel(name);
            node           = StructureModel;

            folder_include = folder.Add("include", "include", "globally included resources");
        }
Esempio n. 8
0
        /// <summary>
        /// Creates the context.
        /// </summary>
        /// <param name="name">The name.</param>
        /// <param name="folder">Root folder for experiment</param>
        /// <returns></returns>
        public PlanesMethodContext CreateContext(string name, folderNode folder)
        {
            var generalContext = new PlanesMethodContext();

            generalContext.name   = name;
            generalContext.folder = folder.Add(name, name, "Home folder of the experiment");

            return(generalContext);
        }
Esempio n. 9
0
        public void TestNGramsAndSimilarity()
        {
            folderNode folder = new folderNode();

            folder = folder.Add("NLP\\WordAnalysis", "Word analysis", "Folder with results of word analysis tests");

            String[] words = new String[] { "ormar", "orman", "rashladni", "konstrukcija", "elektroinstalacija", "elektromotor", "motorno", "građevina", "građevinski", "metalni", "metalno", "metal", "aluminijum", "aluminijumski", "zgrada", "kotao", "kotlovski", "kotlarnica", "peć", "dimnjak", "cevovodi", "vod", "linija", "stanica",
                                            "elektrana", "elektrogradnja", "izgradnja", "gradjevinsko", "grejanje", "grejno", "gorivo", "goriva", "pelet", "panel", "polica", "stolica", "bakarni", "bronzani",
                                            "centrala", "obezbeđenje", "klimatizacija", "klimatizacioni", "ventilacija", "ventilacioni", "gorionik", "vatra", "voda", "cev", "proizvod", "proizvodni", "laser", "proizvodnja", "lasersko", "sečenje", "plazma", "merdevine", "čunak", "štednjak", "radijator", "elektro", "induktivno", "transformator", "transformatorska", "dalekovod", "elektrovod", "mašina", "šinski", "voz", "nadzemno", "visokogradnja", "podzemno", "transport", "prevoz", "izolacija", "plastika", "guma", "štender",
                                            "vitrina", "zamrzivač", "protivpožarna", "zaštita", "prodajna", "kontaktirajte", "kontakt", "kontakti", "telefon", "svetlo", "rasveta", "javna", "kompanija", "firma", "preduzeće", "društvo", "izvoz", "sto", "radni", "snaga", "napon", "krovni", "krov", "konstrukcioni", "konstruisanje", "tehničko", "tehnika", "zaposleni", "radnici", "reference", "kupci", "prodajni", "prodaja", "razvojni", "razvoj", "industrijski", "snabdevanje", "kućni", "nameštaj", "kancelarijski", "prostor", "podno", "pekara", "hleb", "pica", "peći", "pećnica", "žardinjera", "ograda", "čelična", "čelik", "galanterija", "stepenice", "nadvožnjak", "pešački", "saobraćajni", "znak", "tabla", "bilbord", "reklamni", "redni", "fluid", "hlađenje", "zagrevanje", "sagorevanje", "čvrsto", "pirolitički", "parni", "dim", "pepeo", "dopremanje", "čišćenje", "održavanje", "inoks", "inoksni", "inoksa", "razmenjivač", "toplote" };

            StringBuilder sb = new StringBuilder();

            foreach (String word in words)
            {
                sb.AppendLine(wordAnalysisTools.getNGramsDescriptiveLine(word, 2, nGramsModeEnum.overlap));
                sb.AppendLine(wordAnalysisTools.getNGramsDescriptiveLine(word, 2, nGramsModeEnum.ordinal));
            }

            String sbp = folder.pathFor("ngrams.txt", imbSCI.Data.enums.getWritableFileMode.autoRenameThis, "ngrams");

            File.WriteAllText(sbp, sb.ToString());

            wordSimilarityComponent component = new wordSimilarityComponent();

            component.N = 2;
            component.gramConstruction = nGramsModeEnum.overlap;
            component.treshold         = 0.6;
            component.equation         = nGramsSimilarityEquationEnum.DiceCoefficient;

            var result01 = component.GetResult(words);

            String p = folder.pathFor("result01.txt", imbSCI.Data.enums.getWritableFileMode.autoRenameThis, "TestNGrams", false);

            File.WriteAllText(p, result01.ToString());


            component.equation = nGramsSimilarityEquationEnum.JaccardIndex;

            var result02 = component.GetResult(words);

            p = folder.pathFor("result02.txt", imbSCI.Data.enums.getWritableFileMode.autoRenameThis, "TestNGrams", false);
            File.WriteAllText(p, result02.ToString());


            component.equation = nGramsSimilarityEquationEnum.continualOverlapRatio;

            var result03 = component.GetResult(words);

            p = folder.pathFor("result03.txt", imbSCI.Data.enums.getWritableFileMode.autoRenameThis, "TestNGrams", false);
            File.WriteAllText(p, result03.ToString());
        }
        public classificationReportCollection AddOrGetChild(String subCollectionDirPath)
        {
            subCollectionDirPath = subCollectionDirPath.Trim("\\".ToArray());
            if (!Children.ContainsKey(subCollectionDirPath))
            {
                Children.Add(subCollectionDirPath, new classificationReportCollection(subCollectionDirPath));
                var ch = Children[subCollectionDirPath];

                ch.datasetName = datasetName;
                ch.rootFolder  = rootFolder.Add(subCollectionDirPath, ch.name, ch.description);
            }
            return(Children[subCollectionDirPath]);
        }
        private String GetFilenameAndSetInstanceFolder(IFileDataStructure instance, folderNode parentFolder = null, ILogBuilder output = null)
        {
            String filename = ""; // GetFilepath("", instance, false);

            try
            {
                switch (mode)
                {
                case fileStructureMode.subdirectory:
                    //parentFolder = Directory.CreateDirectory(parentFolder.path);
                    if (instance.folder == null)
                    {
                        instance.folder = parentFolder.Add(instance.name, instance.name, "Directory for [" + instance.GetType().Name + "]. " + instance.description);
                    }
                    else
                    {
                        if (instance.folder.name != instance.name)
                        {
                            instance.folder = parentFolder.Add(instance.name, instance.name, "Directory for [" + instance.GetType().Name + "]. " + instance.description);
                        }
                    }
                    filename = type.Name.getCleanPropertyName().add(formatMode.GetExtension(), ".");

                    break;

                case fileStructureMode.none:
                    fileDataStructureExtensions.FileDataStructureError("Can't have File Data Structure loaded if no file structure mode specified", parentFolder, output, null, instance);

                    //throw new NotImplementedException("Can't have File Data Structure loaded if no file structure mode specified");
                    break;
                }
            }
            catch (Exception ex)
            {
                fileDataStructureExtensions.FileDataStructureError("SaveDataStructure failed at designating folder and filename: " + ex.Message, parentFolder, output, ex, instance);
            }
            return(filename);
        }
Esempio n. 12
0
        /// <summary>
        /// Initializes a new instance of the <see cref="experimentCompositeTemplate"/> class.
        /// </summary>
        /// <param name="parentFolder">The parent folder.</param>
        public experimentCompositeTemplate(folderNode parentFolder)
        {
            folder = parentFolder.Add("CompositeTemplates", "Composite Experiment Templates", "Directory with template objects for separate aspects of the experiment");

            folderForClassifierSets = folder.Add("Classifiers", "Classifier sets", "Directory with serialized templates on classifier sets");

            folderForFeatureVectorExtractors = folder.Add("FeatureVectorExtractor", "Classifier sets", "Directory with serialized templates of Feature Vector Extractors");

            folderForExperimentShells = folder.Add("Shells", "Experiment Shells", "Directory with shells - experiment settings outside classifiers and FVEs");

            checkDefaults();

            folder.generateReadmeFiles(appManager.AppInfo);
        }
Esempio n. 13
0
        /// <summary>
        /// Deploys the specified loger.
        /// </summary>
        /// <param name="loger">The loger.</param>
        /// <param name="workspace">The workspace.</param>
        public void deploy(ILogBuilder loger, folderNode workspace)
        {
            if (!dataSourcePath.isNullOrEmpty())
            {
                //dataSource = csvFileExtensions.fromCsvFileToTable(dataSourcePath, dataSource, true);

                dataSource = imbDataTableExtensions.deserializeDataTable(dataSourcePath, imbSCI.Data.enums.reporting.dataTableExportEnum.excel, workspace);
            }

            if (!folderWithAttachmentsName.isNullOrEmpty())
            {
                folderWithAttachments = workspace.Add(folderWithAttachmentsName, "Attachments", "Folder with attachment files, to be send via email");
            }
        }
Esempio n. 14
0
        public void TestModificationLoad()
        {
            folderNode folder = new folderNode();

            folder = folder.Add("diagnostic", "Diagnostic", "Directory used for testing");
            var logger = new builderForLog();

            String p = folder.pathFor("experimentTest2" + ".xml");

            var test2 = objectSerialization.loadObjectFromXML <experimentSetup>(p, logger);


            Assert.AreEqual("MOD", test2.description);
        }
Esempio n. 15
0
        /// <summary> </summary>
        public void SetReportAndCacheFolder(folderNode folder, Boolean reselect = false)
        {
            if (reselect)
            {
                reportFolder = null;
                cacheFolder  = null;
            }
            //folderNode ch_root = folder.parent as folderNode;


            if (cacheFolder == null)
            {
                cacheFolder = folder.Add("cache", "Cached content", "Folder with cached lexic resouce partition, used during the experiment");
            }
        }
Esempio n. 16
0
        /// <summary>
        /// The constructor for main (root) wrapper model node
        /// </summary>
        /// <param name="parentFolder">The folder.</param>
        /// <param name="_name">The name.</param>
        /// <param name="_description">The description.</param>
        protected WrapperModelGraph(folderNode parentFolder, String _name, String _description)
        {
            name = _name;
            if (_description.isNullOrEmpty())
            {
                description = "primary wrapper node - the root";
            }
            else
            {
                description = _description;
            }

            type = WrapperTypeEnum.rootModel;

            _folder = parentFolder.Add(name, "Wrapper model " + _name, "Directory with content harvested by WrapperModel [" + name + "]. " + _description);
        }
        public void DeployCase(kFoldValidationCase valCase, folderNode folderOverride = null)
        {
            var fl = folder;

            if (folderOverride != null)
            {
                fl = folderOverride;
            }
            valCase.kFoldMaster = this;
            valCase.trainingCases.kFoldMaster   = this;
            valCase.evaluationCases.kFoldMaster = this;
            valCase.evaluationCases.kFoldCase   = valCase;
            valCase.trainingCases.kFoldCase     = valCase;
            valCase.folder           = folder.Add(valCase.name, valCase.name, "Operational files and reports for k-fold [" + valCase.name + "]");
            valCase.caseFolder       = valCase.folder.Add("cases", "Cases", "Repository with knowledge on cases");
            valCase.caseSampleFolder = valCase.caseFolder.Add("microAnalysis", "Micro-analysis of FV Extraction", "Randomly picked cases for Micro-analysis of FV Extraction - similarity computation between a case and a category");
        }
        /// <summary>
        /// Saves the specified folder.
        /// </summary>
        /// <param name="folder">The folder.</param>
        /// <param name="options">The options.</param>
        public void Save(String pathToSave, WebDomainCategoryFormatOptions options = WebDomainCategoryFormatOptions.saveReadmeFile | WebDomainCategoryFormatOptions.saveAggregate | WebDomainCategoryFormatOptions.normalizeDomainname, ILogBuilder logger = null)
        {
            DirectoryInfo di     = new DirectoryInfo(pathToSave);
            folderNode    folder = di;

            String domainList = GetDomainList(options, logger);
            String path       = folder.pathFor(categorySiteList, imbSCI.Data.enums.getWritableFileMode.overwrite, "Web sites at this category", true);

            File.WriteAllText(path, domainList);

            StringBuilder sb = new StringBuilder();

            foreach (WebDomainCategory category in this)
            {
                var subFolder = folder.Add(category.name, category.name, "Subcategory");
                category.Save(subFolder.path, options, logger);

                if (options.HasFlag(WebDomainCategoryFormatOptions.saveAggregate))
                {
                    List <string> sites = GetSites(100);
                    sites.ForEach(x => sb.AppendLine(x));
                    //sb.AppendLine(GetDomainList(options, logger));
                }
            }

            if (options.HasFlag(WebDomainCategoryFormatOptions.saveAggregate))
            {
                path = folder.pathFor(categoryAggregateSiteList, imbSCI.Data.enums.getWritableFileMode.overwrite, "Web sites at this category, including subcategories", true);
                File.WriteAllText(path, sb.ToString());
            }

            if (this.root == this)
            {
                if (options.HasFlag(WebDomainCategoryFormatOptions.saveGraphAtRoot))
                {
                    var dgml = GraphConverters.DataSetDomainGraphConverter.Convert(this, 300);  //imbSCI.Graph.Converters.GraphConversionTools.DefaultGraphToDGMLConverterInstance.Convert(this, 300); //.ConvertToDGML(this, 300);
                    dgml.Save(folder.pathFor("dataset.dgml", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphMarkupLanguage representation of categories", true), imbSCI.Data.enums.getWritableFileMode.overwrite);
                }

                if (options.HasFlag(WebDomainCategoryFormatOptions.saveReadmeFile))
                {
                    folder.generateReadmeFiles(imbACE.Core.appManager.AppInfo);
                }
            }
        }
Esempio n. 19
0
        public helpBuilderContext(helpBuilderConfiguration _configuration, Object _scope, string _filename = "")
        {
            configuration = _configuration;
            scope         = _scope;
            scopeType     = scope.GetType();
            scopeEntry    = new settingsMemberInfoEntry(scopeType);


            if (_filename.isNullOrEmpty())
            {
                _filename = "help";
            }
            filename        = _filename;
            folder          = new folderNode(configuration.outputPath, "Help", "Generated help content");
            folder          = folder.Add(scopeType.Name, scopeType.Name, "Help output for [" + scopeType.Name + "]");
            resourcesFolder = new folderNode(configuration.resourcesPath, "Help resources", "Content resources for help generation");
            //
        }
        public void Publish(Dictionary <HtmlNode, String> labelsByDocument, Dictionary <HtmlNode, HtmlSourceAndUrl> documentNodeDictionary, folderNode folderWithResults, DocumentSimilarityResult result)
        {
            var        cluster = this;
            folderNode cFolder = folderWithResults.Add(cluster.name, cluster.name, "Directory for cluster " + cluster.name);

            result.Publish(documentNodeDictionary, cFolder, cluster.items);

            builderForText reporter = new builderForText();

            reporter.AppendHeading("Name: " + cluster.name);
            reporter.AppendPair("Items", cluster.items.Count);

            if (cluster.ClusterSeed != null)
            {
                reporter.AppendPair("Seed", labelsByDocument[cluster.ClusterSeed]);
            }
            foreach (var pair in cluster.range.GetDictionary())
            {
                reporter.AppendPair(pair.Key, pair.Value.ToString("F3"));
            }

            foreach (var item in cluster.items)
            {
                if (item != cluster.ClusterSeed)
                {
                    if (cluster.scoreDictionary.ContainsKey(item))
                    {
                        String           label  = labelsByDocument[item];
                        Double           score  = cluster.scoreDictionary[item];
                        HtmlSourceAndUrl source = documentNodeDictionary[item];
                        reporter.AppendLine("-----------------------------------");
                        reporter.AppendLine(label + " => " + score.ToString("F3"));
                        reporter.AppendLine("Filepath: " + source.filepath);
                        reporter.AppendLine("Url: " + source.url);
                    }
                }
            }

            String reportPath    = cFolder.pathFor("report.txt", imbSCI.Data.enums.getWritableFileMode.overwrite);
            String reportContent = reporter.GetContent();

            File.WriteAllText(reportPath, reportContent);
        }
Esempio n. 21
0
        public void TestExperimentSetupLoadSave()
        {
            folderNode folder = new folderNode();

            folder = folder.Add("diagnostic", "Diagnostic", "Directory used for testing");
            var logger = new builderForLog();


            var test = experimentSetup.GetDefaultExperimentSetup();

            test.name        = "experimentTest";
            test.description = "testing experiment load and save";
            String p = folder.pathFor(test.name + ".xml");

            objectSerialization.saveObjectToXML(test, p);

            var test2 = objectSerialization.loadObjectFromXML <experimentSetup>(p, logger);

            Assert.AreEqual(test.name, test2.name);
            Assert.AreEqual(test.description, test2.description);
            Assert.AreEqual(test.featureVectorExtractors_semantic.Count, test2.featureVectorExtractors_semantic.Count);
        }
        /// <summary>
        /// Saves the web sites.
        /// </summary>
        /// <param name="category">The category.</param>
        /// <param name="rootFolder">The root folder.</param>
        protected void SaveWebSites(WebDocumentsCategory category, folderNode rootFolder, WebDomainCategoryFormatOptions options, ILogBuilder logger = null)
        {
            folderNode    folder     = rootFolder.Add(category.name, category.name, category.description);
            StringBuilder domainList = new StringBuilder();

            foreach (WebSiteDocuments site in category.siteDocuments)
            {
                domainList.AppendLine(site.domain);
                SaveWebSite(site, folder);

                /*
                 * foreach (WebSiteDocument page in site.documents)
                 * {
                 *
                 *  String filename = site.domain.add(page.path, "/");
                 *  filename = filename.Replace("//", "/");
                 *  filename = "http://" + filename;
                 *  filename = GetFilenameFromURLPath(filename);
                 *  filename = WebSiteDocumentsSetTools.GetSafeFilename(filename);
                 *
                 *  String p = folder.pathFor(filename, imbSCI.Data.enums.getWritableFileMode.existing, "Page of [" + site.domain + "] at path [" + page.path + "]", false);
                 *
                 *  String source = GetWebDocumentSource(page);
                 *  if (!File.Exists(p))
                 *  {
                 *      File.WriteAllText(p, source);
                 *  }
                 * }*/
            }

            if (options.HasFlag(WebDomainCategoryFormatOptions.saveDomainList))
            {
                File.WriteAllText(folder.pathFor(WebDomainCategory.categorySiteList, imbSCI.Data.enums.getWritableFileMode.overwrite, "Domains in category [" + category.path + "]", true), domainList.ToString());
            }

            SaveSubcategories(category, folder, options);
        }
 /// <summary>
 /// Initializes a new instance of the <see cref="HarvesterExecutionContext"/> class.
 /// </summary>
 /// <param name="_project">The project.</param>
 /// <param name="_unit">The unit.</param>
 /// <param name="_parentFolder">The parent folder.</param>
 public HarvesterExecutionContext(HarvesterProject _project, HarvesterUnit _unit, folderNode _parentFolder)
 {
     project = _project;
     unit    = _unit;
     folder  = _parentFolder.Add(_project.name, _project.name + " output", _project.description);
 }
Esempio n. 24
0
        public void TestCloudWeaver()
        {
            folderNode folder = new folderNode();

            folderNode weaverFolder   = folder.Add("NLP\\CloudWeaver", "Cloud Weaver", "Folder with results of cloud weaver tests");
            folderNode cloudFolder    = folder.Add("Clouds", "Test resources", "");
            folderNode resourceFolder = folder.Add("resources", "Test resources", "");

            lemmaSemanticWeaver weaver = new lemmaSemanticWeaver();

            weaver.prepare(resourceFolder, null);


            weaver.useSimilarity  = true;
            weaver.similarWords.N = 2;
            weaver.similarWords.gramConstruction = nGramsModeEnum.overlap;
            weaver.similarWords.treshold         = 0.6;
            weaver.similarWords.equation         = nGramsSimilarityEquationEnum.DiceCoefficient;

            weaver.useDictionary = false;

            var cloudPaths = cloudFolder.findFiles("*_initialCloud.xml", SearchOption.TopDirectoryOnly);

            foreach (String path in cloudPaths)
            {
                lemmaSemanticCloud testCloud = lemmaSemanticCloud.Load <lemmaSemanticCloud>(path);

                testCloud.GetSimpleGraph(false).Save(weaverFolder.pathFor(testCloud.className + "_initial.dgml", imbSCI.Data.enums.getWritableFileMode.overwrite), imbSCI.Data.enums.getWritableFileMode.overwrite);

                var report = weaver.Process(testCloud, null);
                report.Save(weaverFolder, "DiceCoefficient");
            }

            weaver.similarWords.equation = nGramsSimilarityEquationEnum.JaccardIndex;

            foreach (String path in cloudPaths)
            {
                lemmaSemanticCloud testCloud = lemmaSemanticCloud.Load <lemmaSemanticCloud>(path);

                var report = weaver.Process(testCloud, null);
                report.Save(weaverFolder, "JaccardIndex");
            }

            weaver.similarWords.equation = nGramsSimilarityEquationEnum.continualOverlapRatio;

            foreach (String path in cloudPaths)
            {
                lemmaSemanticCloud testCloud = lemmaSemanticCloud.Load <lemmaSemanticCloud>(path);

                var report = weaver.Process(testCloud, null);
                report.Save(weaverFolder, "ContinualOverlap");

                objectSerialization.saveObjectToXML(testCloud, weaverFolder.pathFor(testCloud.className + "_weaved.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Processed cloud"));

                testCloud.GetSimpleGraph(false).Save(weaverFolder.pathFor(testCloud.className + "_weaved.dgml", imbSCI.Data.enums.getWritableFileMode.overwrite), imbSCI.Data.enums.getWritableFileMode.overwrite);
            }


            //weaver.similarWords.equation = nGramsSimilarityEquationEnum.continualOverlapRatio;
            //weaver.useDictionary = true;

            //foreach (String path in cloudPaths)
            //{
            //    lemmaSemanticCloud testCloud = lemmaSemanticCloud.Load<lemmaSemanticCloud>(path);

            //    var report = weaver.Process(testCloud, null);
            //    report.Save(weaverFolder, "JaccardIndexAndApertium");



            //}

            folder.generateReadmeFiles(new imbSCI.Core.data.aceAuthorNotation());
        }
        /// <summary>
        /// Creates report table version for the <c>source</c> and saves the report on specified <c>folder</c>
        /// </summary>
        /// <param name="source">The source.</param>
        /// <param name="folder">The folder.</param>
        /// <param name="notation">The notation.</param>
        /// <param name="filenamePrefix">The filename prefix.</param>
        /// <param name="disablePrimaryKey">if set to <c>true</c> [disable primary key].</param>
        /// <param name="allowAsyncCall">if set to <c>true</c> [allow asynchronous call].</param>
        /// <returns></returns>
        /// <exception cref="ArgumentNullException">Folder is null! at GetReportAndSave() for [" + source.TableName + "] at filename [" + filenamePrefix + "]</exception>
        public static DataTableForStatistics GetReportAndSave(this DataTable source, folderNode folder, aceAuthorNotation notation = null, string filenamePrefix = "", bool disablePrimaryKey = true, Boolean allowAsyncCall = false)
        {
            if (notation == null)
            {
                notation = new aceAuthorNotation();
            }

            if (allowAsyncCall)
            {
                if (imbSCI.Core.config.imbSCICoreConfig.settings.DataTableReports_AsyncExportCalls)
                {
                    DataTableForStatisticsExportJob job = new DataTableForStatisticsExportJob(source, folder, notation, filenamePrefix, disablePrimaryKey);
                    Thread t = new Thread(job.Do);
                    t.Start();
                    return(null);
                    // Task.Factory
                }
            }

            // if (source == null) return new DataTableForStatistics();

            if (folder == null)
            {
                throw new ArgumentNullException("Folder is null! at GetReportAndSave() for [" + source.TableName + "] at filename [" + filenamePrefix + "]");
            }

            if (source.Columns.Count > 0)
            {
                folderNode dataFolder = null;
                if (DataTableForStatistics.AUTOSAVE_CleanDataTable || DataTableForStatistics.AUTOSAVE_FieldsText || imbSCI.Core.config.imbSCICoreConfig.settings.DataTableReports_DoExportXMLData)
                {
                    dataFolder = folder.Add(EXTRAFOLDER, "Excel report meta data", "Folder containing clean data export (single header row, CSV format) for easier use by other software platforms and/or column meta descriptions - additional information - in separate txt file for each Excel report created.");
                }

                if (imbSCI.Core.config.imbSCICoreConfig.settings.DataTableReports_DoExportXMLData)
                {
                    try
                    {
                        String xmlCode = objectSerialization.ObjectToXML(source);
                        xmlCode.saveStringToFile(dataFolder.pathFor(source.TableName.getFilename(".xml"), getWritableFileMode.overwrite, "XML Serialized DataTable [" + source.GetTitle() + "]", true));
                    }
                    catch (Exception ex)
                    {
                        source.SetAdditionalInfoEntry("XML data", "Serialization failed: " + ex.Message);
                    }
                }

                if (DataTableForStatistics.AUTOSAVE_CleanDataTable)
                {
                    string cld = source.serializeDataTable(dataTableExportEnum.csv, PREFIX_CLEANDATATABLE + filenamePrefix.getFilename() + ".csv", dataFolder, notation);
                    source.SetAdditionalInfoEntry("Clean data", cld);
                }

                if (DataTableForStatistics.AUTOSAVE_FieldsText)
                {
                    string cli = dataFolder.pathFor(PREFIX_COLUMNINFO + filenamePrefix.getFilename() + ".txt");
                    source.GetUserManualForTableSaved(cli);
                    source.SetAdditionalInfoEntry("Column info", cli);
                }

                if (tableReportCreation_insertFilePathToTableExtra)
                {
                }
            }

            DataTableForStatistics output = null;

            if (source is DataTableForStatistics)
            {
                output = source as DataTableForStatistics;
            }
            else
            {
                output = source.GetReportTableVersion(disablePrimaryKey);
                // output.SetDefaults();

                //source.serializeDataTable(enums.dataTableExportEnum.excel, filenamePrefix + "_source", folder, notation);
            }

            output.Save(folder, notation, filenamePrefix);

            return(output);
        }
        public void FullReport(folderNode folder, String datasetA_name = "A", String datasetB_name = "B", String runName = "DataSets")
        {
            folder = folder.Add(runName, runName, "Reports on datasets [" + datasetA_name + "," + datasetB_name + "]");

            #region --------------- TERM LISTS ------------------------------
            String tknA_p = folder.pathFor("tokens_" + datasetA_name + ".txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "List of all unique terms in the dataset [" + datasetA_name + "]");
            String tknB_p = folder.pathFor("tokens_" + datasetB_name + ".txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "List of all unique terms in the dataset [" + datasetB_name + "]");
            String tknC_p = folder.pathFor("tokens_common.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "List of all unique terms, existing in both datasets");

            String tknA_u_p = folder.pathFor("tokens_" + datasetA_name + "_specific.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "List of all unique terms, being spcific to the dataset [" + datasetA_name + "], and not part of the other dataset");
            String tknB_u_p = folder.pathFor("tokens_" + datasetB_name + "_specific.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "List of all unique terms, being spcific to the dataset [" + datasetB_name + "], and not part of the other dataset");

            File.WriteAllText(tknA_p, tknA.toCsvInLine(","));
            File.WriteAllText(tknB_p, tknB.toCsvInLine(","));
            File.WriteAllText(tknC_p, tknC.toCsvInLine(","));

            File.WriteAllText(tknA_u_p, tknA_u.toCsvInLine(","));
            File.WriteAllText(tknB_u_p, tknB_u.toCsvInLine(","));
            #endregion

            #region ------------------ domain list
            String docSetUA_p = folder.pathFor("domains_" + datasetA_name + "_specific.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "List of all domains, being unique for the dataset [" + datasetA_name + "]");
            String docSetUB_p = folder.pathFor("domains_" + datasetB_name + "_specific.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "List of all domains, being unique for the dataset [" + datasetB_name + "]");
            String docSetUC_p = folder.pathFor("domains_common.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "List of all domains, being common to both datasets");
            String docSetA_p  = folder.pathFor("domains_" + datasetA_name + ".txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "List of all domains in the dataset");
            String docSetB_p  = folder.pathFor("domains_" + datasetB_name + ".txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "List of all domains in the dataset");

            File.WriteAllLines(docSetUA_p, DocumentSetsUniqueForA);
            File.WriteAllLines(docSetUB_p, DocumentSetsUniqueForB);
            File.WriteAllLines(docSetUC_p, DocumentSetsInCommonByName);

            File.WriteAllLines(docSetA_p, analyticA.domains);
            File.WriteAllLines(docSetB_p, analyticB.domains);
            #endregion

            analyticA.GetDataTable(datasetA_name + "_contentAnalysis").GetReportAndSave(folder, new imbSCI.Core.data.aceAuthorNotation());
            analyticB.GetDataTable(datasetB_name + "_contentAnalysis").GetReportAndSave(folder, new imbSCI.Core.data.aceAuthorNotation());

            //  HtmlTagCategoryTree dataSetASummary = new HtmlTagCategoryTree(datasetA_name, "HTML Tags statistics");
            foreach (var pair in analyticA.categoryNameVsHtmlTag)
            {
                pair.Value.GetDataTable(datasetA_name + "_" + pair.Key + "_htmlTag", pair.Value.description).GetReportAndSave(folder, new imbSCI.Core.data.aceAuthorNotation());
                //       dataSetASummary.Merge(pair.Value);
            }


            analyticA.GlobalCategoryTree.GetDataTable(datasetA_name + "_htmlTag_all", "Aggregate HTML tags statistics").GetReportAndSave(folder, new imbSCI.Core.data.aceAuthorNotation());

            //  HtmlTagCategoryTree dataSetBSummary = new HtmlTagCategoryTree(datasetB_name, "HTML Tags statistics");
            foreach (var pair in analyticB.categoryNameVsHtmlTag)
            {
                pair.Value.GetDataTable(datasetB_name + "_" + pair.Key + "_htmlTag", pair.Value.description).GetReportAndSave(folder, new imbSCI.Core.data.aceAuthorNotation());
                //  dataSetBSummary.Merge(pair.Value);
            }
            //   dataSetBSummary.GetDataTable(datasetB_name + "_htmlTag_all", "Aggregate HTML tags statistics").GetReportAndSave(folder, new imbSCI.Core.data.aceAuthorNotation());

            analyticB.GlobalCategoryTree.GetDataTable(datasetB_name + "_htmlTag_all", "Aggregate HTML tags statistics").GetReportAndSave(folder, new imbSCI.Core.data.aceAuthorNotation());

            GetPCE().getDataTable().Save(folder, new imbSCI.Core.data.aceAuthorNotation(), "ComparisonMetrics");
            folder.generateReadmeFiles(new imbSCI.Core.data.aceAuthorNotation());
        }
Esempio n. 27
0
        /// <summary>
        /// Prepares for parallel execution.
        /// </summary>
        /// <param name="tools">The tools.</param>
        /// <param name="_context">The context.</param>
        public webProjectKnowledgeSet PrepareForParallelExecution(classifierTools tools, experimentExecutionContext _context)
        {
            if (caseKnowledgeSet == null)
            {
                caseKnowledgeSet = new webProjectKnowledgeSet();
            }

            if (items.Any())
            {
                experimentContext.notes.log("Mining Context was ready already.");
                return(caseKnowledgeSet);
            }
            DateTime startTime = DateTime.Now;

            experimentContext = _context;



            List <webCaseKnowledge> cases = new List <webCaseKnowledge>();

            folderNode classReportFolder = experimentContext.folder.Add("General", "General and diagnostic reports", "The folder contains general (outside k-folds) reports on analysied industries (categories), web sites and other diagnostic data");

            // <----------------------------------------------------------------------------------------------------------------        [ performing pipeline ]
            experimentContext.notes.log("Executing the Mining Context decomposition with the pipeline model");
            foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses())
            {
                var pipelineContext = GetContextForPipeline(tools, classSet);
                sitesByCategory.Add(classSet, new List <pipelineTaskMCSiteSubject>());

                if (!pipelineContext.exitByType.ContainsKey(typeof(pipelineTaskMCSiteSubject)))
                {
                    throw new aceGeneralException("Pipeline context output contains no web site subjects! Check the pipeline Site Task constructor.", null, pipelineContext, "Pipeline broken");
                }

                var sitesForContext = pipelineContext.exitByType[typeof(pipelineTaskMCSiteSubject)]; // <----- preparing
                foreach (var site in sitesForContext)
                {
                    tokenBySite.Add(site as pipelineTaskMCSiteSubject, new ConcurrentBag <pipelineTaskSubjectContentToken>());
                    sitesByCategory[classSet].Add(site as pipelineTaskMCSiteSubject);

                    webCaseKnowledge webCase = new webCaseKnowledge(site as pipelineTaskMCSiteSubject, classSet);

                    caseKnowledgeSet.Add(webCase);
                    cases.Add(webCase);
                }

                semanticFVExtractorKnowledge kn = new semanticFVExtractorKnowledge();
                kn.name = classSet.name + "_general";
                kn.relatedItemPureName = classSet.name;
                kn.type = WebFVExtractorKnowledgeType.aboutCompleteCategory;
                kn.Deploy(classReportFolder, experimentContext.logger);
                knowledgeByClass.TryAdd(classSet, kn);
            }

            experimentContext.notes.log("Sorting tokens for all sites [in parallel]");
            Parallel.ForEach(tokenBySite.Keys, site =>
            {
                var leafs = site.getAllLeafs();
                foreach (var leaf in leafs)
                {
                    pipelineTaskSubjectContentToken token = leaf as pipelineTaskSubjectContentToken;
                    if (token != null)
                    {
                        tokenBySite[site].Add(token);
                    }
                }
            });

            foreach (var c in cases)
            {
                c.tokens = tokenBySite[c.MCSiteSubject];
            }


            experimentContext.notes.log("Building diagnostic TF-IDF master tables for all classes [in parallel]");


            Boolean useIntegratedApproach = false;



            if (useIntegratedApproach)
            {
                var valCase = experimentContext.validationCollections[experimentContext.masterExtractor.name].GetDiagnosticCase(experimentContext.classes);
                Parallel.ForEach(sitesByCategory, pair =>
                {
                    knowledgeByClass.TryAdd(pair.Key, experimentContext.masterExtractor.DoFVExtractionForClassViaCases(valCase.trainingCases[pair.Key.classID], pair.Key, valCase, experimentContext.tools, experimentContext.logger));
                });
            }
            else
            {
                Parallel.ForEach(sitesByCategory, pair =>
                {
                    IDocumentSetClass category             = pair.Key;
                    List <pipelineTaskMCSiteSubject> sites = pair.Value;

                    var lt = BuildLemmaTableForClass(tools, category, sites);
                    lt.Save();
                    // lt.SaveAs(classReportFolder.pathFor(lt.info.Name), imbSCI.Data.enums.getWritableFileMode.overwrite);
                });
            }

            experimentContext.notes.log("Saving lexic resource cache subset - for later reuse in case of repeated experiment run");
            tools.SaveCache();


            if (!useIntegratedApproach)
            {
                experimentContext.notes.log("Performing chunk construction for all web sites in all categories [in serial]");



                foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses())
                {
                    BuildChunksForClass(tools, classSet);
                }



                foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses())
                {
                    experimentContext.masterExtractor.chunkTableConstructor.process(chunksByCategory[classSet], cnt_level.mcPage, knowledgeByClass[classSet].WLChunkTableOfIndustryClass, null, experimentContext.logger, false);
                }
            }

            if (tools.operation.doCreateDiagnosticMatrixAtStart)
            {
                experimentContext.notes.log("Performing diagnostic analysis on all categories...[doCreateDiagnosticMatrixAtStart=true]");



                folderNode matrixReport = classReportFolder.Add("clouds", "More reports on semantic cloud", "Directory contains exported DirectedGraphs, varous matrix derivates, combined cloud and other diagnostic things");

                List <lemmaSemanticCloud> clouds         = new List <lemmaSemanticCloud>();
                List <lemmaSemanticCloud> filteredClouds = new List <lemmaSemanticCloud>();

                var converter = lemmaSemanticCloud.GetDGMLConverter();

                foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses())
                {
                    // experimentContext.masterExtractor.chunkTableConstructor.process(chunksByCategory[classSet], cnt_level.mcPage, knowledgeByClass[classSet].WLChunkTableOfIndustryClass, null, experimentContext.logger, false);


                    var cloud = experimentContext.masterExtractor.CloudConstructor.process(knowledgeByClass[classSet].WLChunkTableOfIndustryClass, knowledgeByClass[classSet].WLTableOfIndustryClass, knowledgeByClass[classSet].semanticCloud, experimentContext.logger, tokenBySite.Keys.ToList(), tools.GetLemmaResource());
                    knowledgeByClass[classSet].semanticCloud.className = classSet.name;
                    clouds.Add(cloud);

                    if (experimentContext.tools.operation.doUseSimpleGraphs)
                    {
                        cloud.GetSimpleGraph(true).Save(matrixReport.pathFor("cloud_initial_" + classSet.name, imbSCI.Data.enums.getWritableFileMode.none, "Initial version of full-sample set, diagnostic Semantic Cloud for category [" + classSet.name + "]"));
                    }
                    else
                    {
                        converter.ConvertToDMGL(cloud).Save(matrixReport.pathFor("cloud_initial_" + classSet.name, imbSCI.Data.enums.getWritableFileMode.none, "Initial version of full-sample set, diagnostic Semantic Cloud for category [" + classSet.name + "]"));
                    }



                    knowledgeByClass[classSet].semanticCloudFiltered           = knowledgeByClass[classSet].semanticCloud.CloneIntoType <lemmaSemanticCloud>(true);
                    knowledgeByClass[classSet].semanticCloudFiltered.className = classSet.name;
                    filteredClouds.Add(knowledgeByClass[classSet].semanticCloudFiltered);
                }

                cloudMatrix matrix = new cloudMatrix("CloudMatrix", "Diagnostic cloud matrix created from the complete sample set of [" + clouds.Count() + "] classes");
                matrix.build(filteredClouds, experimentContext.logger);

                lemmaSemanticCloud mergedCloudInitial = matrix.GetUnifiedCloud();
                mergedCloudInitial.Save(matrixReport.pathFor("unified_initial_cloud.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized object - Initial version of Semantic Cloud built as union of full-sample set Semantic Clouds of all categories"));


                var reductions = matrix.TransformClouds(experimentContext.masterExtractor.settings.semanticCloudFilter, experimentContext.logger);

                var p = matrixReport.pathFor("reductions_nodes.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "Report on Cloud Matrix transformation process");
                File.WriteAllLines(p, reductions);



                matrix.BuildTable(experimentContext.masterExtractor.settings.semanticCloudFilter, cloudMatrixDataTableType.initialState | cloudMatrixDataTableType.maxCloudFrequency | cloudMatrixDataTableType.absoluteValues).GetReportAndSave(matrixReport, appManager.AppInfo, "matrix_max_cf_initial", true, experimentContext.tools.operation.doReportsInParalell);

                matrix.BuildTable(experimentContext.masterExtractor.settings.semanticCloudFilter, cloudMatrixDataTableType.initialState | cloudMatrixDataTableType.overlapSize | cloudMatrixDataTableType.absoluteValues).GetReportAndSave(matrixReport, appManager.AppInfo, "matrix_overlap_size_initial", true, experimentContext.tools.operation.doReportsInParalell);

                matrix.BuildTable(experimentContext.masterExtractor.settings.semanticCloudFilter, cloudMatrixDataTableType.initialState | cloudMatrixDataTableType.overlapValue | cloudMatrixDataTableType.absoluteValues).GetReportAndSave(matrixReport, appManager.AppInfo, "matrix_overlap_value_initial", true, experimentContext.tools.operation.doReportsInParalell);


                matrix.ExportTextReports(matrixReport, true, "matrix_cf");
                matrix.ExportTextReports(matrixReport, false, "matrix_cf");

                lemmaSemanticCloud mergedCloudAfterReduction = matrix.GetUnifiedCloud();
                mergedCloudAfterReduction.Save(matrixReport.pathFor("unified_reduced_cloud.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized object -Version of all-categories diagnostic Semantic Cloud, after Cloud Matrix filter was applied"));

                if (experimentContext.tools.operation.doUseSimpleGraphs)
                {
                    mergedCloudInitial.GetSimpleGraph(true).Save(matrixReport.pathFor("unified_initial_cloud", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphML file - unified Semantic Cloud, before Cloud Matrix filter was applied - Open this in VisualStudo)"));
                }
                else
                {
                    converter = lemmaSemanticCloud.GetDGMLConverter();

                    converter.ConvertToDMGL(mergedCloudInitial).Save(matrixReport.pathFor("unified_initial_cloud", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphML file - unified Semantic Cloud, before Cloud Matrix filter was applied - Open this in VisualStudo)"));
                }


                // <-------- analysis -----------------------------------------------------------------------------------
                DataTableTypeExtended <freeGraphReport> cloudReports = new DataTableTypeExtended <freeGraphReport>();
                foreach (var cl in filteredClouds)
                {
                    freeGraphReport fgReport = new freeGraphReport(cl);
                    fgReport.Save(matrixReport);
                    cloudReports.AddRow(fgReport);
                }
                freeGraphReport unifiedReport = new freeGraphReport(mergedCloudAfterReduction);
                unifiedReport.Save(matrixReport);
                cloudReports.AddRow(unifiedReport);


                cloudReports.GetReportAndSave(matrixReport, appManager.AppInfo, "analysis_SemanticClouds");
                // <-------- analysis -----------------------------------------------------------------------------------



                foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses())
                {
                    var cloud = knowledgeByClass[classSet].semanticCloudFiltered; // .WLChunkTableOfIndustryClass, knowledgeByClass[classSet].WLTableOfIndustryClass, knowledgeByClass[classSet].semanticCloud, experimentContext.logger, tokenBySite.Keys.ToList());


                    if (experimentContext.tools.operation.doUseSimpleGraphs)
                    {
                        cloud.GetSimpleGraph(true).Save(matrixReport.pathFor("unified_initial_cloud", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphML file - unified Semantic Cloud, before Cloud Matrix filter was applied - Open this in VisualStudo)"));
                    }
                    else
                    {
                        converter = lemmaSemanticCloud.GetDGMLConverter();

                        converter.ConvertToDMGL(cloud).Save(matrixReport.pathFor("unified_initial_cloud", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphML file - unified Semantic Cloud, before Cloud Matrix filter was applied - Open this in VisualStudo)"));
                    }



                    //converter.ConvertToDMGL(cloud).Save(matrixReport.pathFor("cloud_reduced_" + classSet.name, imbSCI.Data.enums.getWritableFileMode.none, "DirectedGraphML file - Initial version of Semantic Cloud built as union of full-sample set Semantic Clouds of all categories (Open this with VS)"), imbSCI.Data.enums.getWritableFileMode.overwrite);
                }

                instanceCountCollection <String> tfcounter = new instanceCountCollection <string>();
                foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses())
                {
                    var wlt = knowledgeByClass[classSet].WLTableOfIndustryClass.GetDataTable();
                    wlt.DefaultView.Sort = "termFrequency desc";
                    var sorted = wlt.DefaultView.ToTable();
                    var tbl    = wlt.GetClonedShema <DataTable>(true);

                    tbl.CopyRowsFrom(sorted, 0, 100);
                    tbl.GetReportAndSave(classReportFolder, appManager.AppInfo, classSet.name + "_WebLemma", true, experimentContext.tools.operation.doReportsInParalell);

                    var cht = knowledgeByClass[classSet].WLChunkTableOfIndustryClass.GetDataTable();
                    cht.DefaultView.Sort = "termFrequency desc";
                    var csorted = cht.DefaultView.ToTable();

                    tbl = cht.GetClonedShema <DataTable>(true);
                    tbl.CopyRowsFrom(csorted, 0, 100);
                    tbl.GetReportAndSave(classReportFolder, appManager.AppInfo, classSet.name + "_Chunks", true, experimentContext.tools.operation.doReportsInParalell);

                    tfcounter.AddInstanceRange(knowledgeByClass[classSet].WLTableOfIndustryClass.unresolved);


                    knowledgeByClass[classSet].OnBeforeSave();
                }

                List <String> countSorted = tfcounter.getSorted();
                StringBuilder sb          = new StringBuilder();
                foreach (String s in countSorted)
                {
                    sb.AppendLine(String.Format("{1}  :  {0}", s, tfcounter[s]));
                }
                String pt = classReportFolder.pathFor("unresolved_tokens.txt", imbSCI.Data.enums.getWritableFileMode.none, "Cloud Frequency list of all unresolved letter-only tokens");
                File.WriteAllText(pt, sb.ToString());
            }


            if (tools.operation.doFullDiagnosticReport)
            {
                experimentContext.notes.log("Generating full diagnostic report on classes...");
                DataTable rep = null;
                foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses())
                {
                    rep = this.GetClassKnowledgeReport(classSet, rep);
                }
                rep.SetAdditionalInfoEntry("Experiment", experimentContext.setup.name);

                rep.AddExtra("Experiment: " + experimentContext.setup.name);

                rep.AddExtra("Info: " + experimentContext.setup.description);

                rep.SetDescription("Structural report for all classes in the experiment");
                rep.GetReportAndSave(classReportFolder, appManager.AppInfo, "structural_class_report", true, experimentContext.tools.operation.doReportsInParalell);
            }

            classReportFolder.generateReadmeFiles(appManager.AppInfo);


            experimentContext.notes.log("Mining Context preprocessing done in [" + DateTime.Now.Subtract(startTime).TotalMinutes.ToString("F2") + "] minutes");
            return(caseKnowledgeSet);
        }
Esempio n. 28
0
        /// <summary>
        /// Analysises the specified folder.
        /// </summary>
        /// <param name="folder">The folder.</param>
        /// <param name="log">The log.</param>
        public void Analysis(folderNode folder, ILogBuilder log)
        {
            if (globalReport == null && settings.RequiredScopes.HasFlag(CWPAnalysisScopeEnum.globalLevel))
            {
                if (log != null)
                {
                    log.log("Making global report");
                }

                FeatureCWPAnalysisEntryReport entryReport = null;

                entryReport = new FeatureCWPAnalysisEntryReport("Global", "Feature analysis for complete dataset ", folder?.Add("_global", "Global", "GlobalReport"), settings.purpose);

                if (log != null)
                {
                    log.log("Making global dataset report");
                }
                foreach (var term in datasetStatsModel.terms.GetTokens())
                {
                    FeatureCWPAnalysisSiteMetrics metrics = datasetStatsModel.QueryForTerm(term); // QueryTermGlobalLevel(term);
                    SetMetrics(metrics);
                    entryReport.Append(metrics, false);
                }

                entryReport.Save(log);

                globalReport = entryReport;
            }

            if (!categoryReports.Any() && settings.RequiredScopes.HasFlag(CWPAnalysisScopeEnum.categoryLevel))
            {
                if (folder != null)
                {
                    foreach (SpaceDocumentStatsModel category in datasetStatsModel.Children)
                    {
                        folder.Add(category.name, category.name, "");
                    }
                }

                if (log != null)
                {
                    log.log("Making category level dataset reports");
                }

                foreach (var category in datasetStatsModel.Children)
                {
                    FeatureCWPAnalysisEntryReport rp = null;

                    if (folder != null)
                    {
                        rp = SubAnalysis(category, folder[category.name], log);
                    }
                    else
                    {
                        rp = SubAnalysis(category, null, log);
                    }
                    rp.Save(log, false);

                    categoryReports.Add(rp);
                }
            }



            if (datasetReport == null && settings.RequiredScopes.HasFlag(CWPAnalysisScopeEnum.mainLevel))
            {
                if (log != null)
                {
                    log.log("Making main level report");
                }

                datasetReport = new FeatureCWPAnalysisDatasetReport("Dataset", "Final report on the dataset", folder?.Add("_main", "main", ""), categoryReports);
                datasetReport.Save(log);
            }

            if (unitaryReport == null && settings.RequiredScopes.HasFlag(CWPAnalysisScopeEnum.unitaryLevel))
            {
                if (log != null)
                {
                    log.log("Making unitary report");
                }
                unitaryReport = new FeatureCWPAnalysisEntryReport("Fusioned report", "Cross category report with MAX(particularity) and MAX(commonality)", folder?.Add("_unitary", "Unitary", ""), settings.purpose);

                foreach (var pair in categoryReports)
                {
                    foreach (System.Collections.Generic.KeyValuePair <string, FeatureCWPAnalysisSiteMetrics> e in pair)
                    {
                        SetMetrics(e.Value);
                        unitaryReport.AddMerge(e.Value, false);
                    }
                }

                unitaryReport.PostMerge();
                unitaryReport.Save(log, false);
            }

            if (frequencies == null && settings.RequiredScopes.HasFlag(CWPAnalysisScopeEnum.rawLevel))
            {
                if (log != null)
                {
                    log.log("Making raw report");
                }

                frequencies = new FeatureCWPFrequencyDictionary();
                frequencies.Deploy(datasetStatsModel);
                frequencies.PublishTableBlocks(folder.Add("_freq", "Frequencies", "Absolute frequencies by scope"));
            }

            if (flatReport == null && settings.RequiredScopes.HasFlag(CWPAnalysisScopeEnum.flatSiteLevel))
            {
                if (log != null)
                {
                    log.log("Making flat report");
                }

                flatReport = new FeatureCWPAnalysisEntryReport("Flat report", "Report produced as if all sites are in single cateogry", folder?.Add("_flat", "Flat", ""), settings.purpose);
                flatReport = SubAnalysis(flatDataSetStatsModel, flatReport.folder, log);
                flatReport.Save(log, false);
            }
        }
        public void Publish(folderNode folder, DirectedGraphWithSourceData InitialGraph)
        {
            ColorGradientForInstanceEnumeration <String> chunkInstanceGradient = new ColorGradientForInstanceEnumeration <String>("#f7941d", "#6dcff6");

            chunkInstanceGradient.Prepare(items.Select(x => x.ExtractorName));
            if (InitialGraph == null)
            {
                return;
            }

            NodeGraph chunkRootGraph = BuildChunkRootsGraph();



            var chunkRootGraphNodes = InitialGraph.Nodes.Get(chunkRootGraph.getAllChildren().Select(x => x.path)).ToDictionary(x => x.Id);

            foreach (var gnp in chunkRootGraphNodes)
            {
                if (gnp.Value is Node)
                {
                    gnp.Value.Background      = "#999999";
                    gnp.Value.StrokeDashArray = "2,5,2,5,2,5";
                    gnp.Value.StrokeThinkness = 2;
                }
            }

            List <NodeGraph> TargetRootNodes = chunkRootGraph.getAllChildrenInType <NodeGraph>().Where(x => x.HasMetaData()).ToList();

            ListDictionary <ContentChunk, DirectedGraph> subgraphs = new ListDictionary <ContentChunk, DirectedGraph>();

            NodeDictionaryGraphStyleSettings style = new NodeDictionaryGraphStyleSettings();

            foreach (NodeGraph ng in TargetRootNodes)
            {
                var TargetRootNodesInContentGraph = InitialGraph.Select <NodeGraph>(new List <NodeGraph>()
                {
                    ng
                }, x => x.path, true, true);
                ContentChunk chunk     = ng.GetMetaData <ContentChunk>();
                var          nodeColor = chunkInstanceGradient.GetColor(chunk.ExtractorName, true);


                foreach (var pair in TargetRootNodesInContentGraph)
                {
                    if (pair.Key is Node node)
                    {
                        var allLinked = chunk.PublishAnnotation(InitialGraph, nodeColor, style);

                        var sg = InitialGraph.GetSubgraph(allLinked.SelectMany(x => x));
                        sg.Title = chunk.name;

                        subgraphs[chunk].Add(sg);
                    }
                    else if (pair.Key is Link link)
                    {
                        // link.Stroke = nodeColor;
                    }
                }
            }

            foreach (var pair in subgraphs)
            {
                Int32 grpi = 0;
                foreach (var grp in pair.Value)
                {
                    String grpp = "chunk_subgraph" + pair.Key.name + grpi.ToString() + ".dgml";

                    grp.Save(folder.pathFor(grpp, imbSCI.Data.enums.getWritableFileMode.overwrite), imbSCI.Data.enums.getWritableFileMode.overwrite);
                    grpi++;
                }
            }

            InitialGraph.Save(folder.pathFor("CompleteGraph.dgml"), imbSCI.Data.enums.getWritableFileMode.overwrite);

            /*
             * var chunkTargetRootNodes = InitialGraph.Nodes.Get(.Select(x => x.path)).ToDictionary(x => x.Id);
             * foreach (var gnp in chunkTargetRootNodes)
             * {
             *
             *  if (gnp.Value is Node)
             *  {
             *      gnp.Value.Background = chunkInstanceGradient.GetColor(chunk.ExtractorName, true);
             *      gnp.Value.StrokeDashArray = "";
             *      gnp.Value.StrokeThinkness = 5;
             *  }
             * }
             *
             *
             *
             * foreach (ContentChunk chunk in items)
             * {
             *  var graphNodes = InitialGraph.Nodes.Get(chunk.ContentAnalysis.allContent.items.Select(x => x.XPath));
             *
             *  var nodeColor = chunkInstanceGradient.GetColor(chunk.ExtractorName, true);
             *
             *  var itemsByXPath = chunk.ContentAnalysis.allContent.items.ToDictionary(x => x.XPath);
             *
             *  var graphNodeByXPath = chunk.ContentAnalysis.allContent.ContentGraph.getAllChildren().ToDictionary(x => x.path);
             *
             *  foreach (var gn in graphNodes)
             *  {
             *      gn.Background = nodeColor;
             *
             *  }
             *
             *  var SelectedGraphNodes = InitialGraph.Nodes.Get(itemsByXPath.Keys).ToDictionary(x => x.Id);
             *
             *  foreach (var gnp in SelectedGraphNodes)
             *  {
             *      gnp.Value.Background = nodeColor;
             *
             *  }
             *
             *
             * }
             *
             */

            foreach (ContentChunk chunk in items)
            {
                String chunkStrictName = chunk.name.getCleanPropertyName();

                var subfolder = folder.Add(chunkStrictName, chunk.name, "Cluster group diagnostics");

                chunk.Publish(subfolder);
            }
        }