/// <summary> /// Call this method to execute all test methods, declared in child class /// </summary> /// <param name="resultsNode">The results node.</param> public void ExecuteTest(folderNode resultsNode = null) { imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(log, GetType().Name); if (resultsNode != null) { folderResults = resultsNode; } log.log("-- Starting test [" + GetType().Name + "]"); foreach (MethodInfo mi in GetType().GetMethods(System.Reflection.BindingFlags.Public | System.Reflection.BindingFlags.DeclaredOnly | System.Reflection.BindingFlags.Instance)) { folderNode baseResults = folderResults; folderResults = baseResults.Add(mi.Name, mi.Name.imbTitleCamelOperation(true), "Results of test method [" + mi.Name + "]"); if (!mi.GetParameters().Any()) { log.log("-- Starting test method [" + mi.Name + "]"); try { mi.Invoke(this, null); } catch (Exception ex) { log.log("-- : " + ex.LogException("Test failed[" + mi.Name + "]", GetType().Name + " -")); } } folderResults = baseResults; } Done(); }
private void reportTarget(spiderTarget t, folderNode fn, int c) { string pageFolder = "P" + c.ToString("D3") + "_" + t.IsRelevant.ToString(); folderNode pfn = fn.Add(pageFolder, "Page " + c.ToString(), "Report on page " + t.url + " crawled by " + name + ". Target.IsRelevant: " + t.IsRelevant + ".".addLine(pageDescription)); fileunit content = new fileunit(pfn.pathFor("content.txt"), false); fileunit links = new fileunit(pfn.pathFor("links.txt"), false); if (t.evaluation != null) { t.evaluation.saveObjectToXML(pfn.pathFor("relevance.xml")); } content.setContent(t.pageText); //t.page.relationship.outflowLinks if (t.page != null) { foreach (spiderLink ln in t.page.relationship.outflowLinks.items.Values) { string rl = ln.url; links.Append(ln.url); } //t.page.webpage.links.ForEach(x => links.Append(x.nature + " | " + x.name + " | " + x.url)); } content.Save(); links.Save(); // marks.Save(); }
//public Dictionary<HtmlNode, DocumentCluster> GetClusterByDocumentDictionary() //{ // Dictionary<HtmlNode, DocumentCluster> output = new Dictionary<HtmlNode, DocumentCluster>(); // foreach (DocumentCluster cluster in this.) // { // var nodes = cluster.items.Select(x => x); // foreach (HtmlNode node in nodes) // { // context.DeclarationConstruction_ClusterAnalysisContext.ClusterByDocuments.Add(node, cluster); // } // if (cluster.ClusterSeed != null) // { // context.DeclarationConstruction_ClusterAnalysisContext.ClusterByDocuments.Add(cluster.ClusterSeed, cluster); // } // } //} public void Publish(Dictionary <HtmlNode, HtmlSourceAndUrl> documentNodeDictionary, folderNode folderWithResults, DocumentSimilarityResult result) { folderWithResults.generateReadmeFiles(null); var items = GetClusters <DocumentCluster>(true); Dictionary <HtmlNode, string> labelsByDocument = result.GetLabelsByDocument(); if (!name.isNullOrEmpty()) { folderWithResults = folderWithResults.Add(name, name, "Reports for cluster collection " + name); } builderForText reporter = new builderForText(); foreach (DocumentCluster cluster in items) { cluster.Publish(labelsByDocument, documentNodeDictionary, folderWithResults, result); reporter.AppendPair(cluster.name, cluster.items.Count); reporter.AppendPair("- range", cluster.range.Range); } String reportPath = folderWithResults.pathFor("report.txt", imbSCI.Data.enums.getWritableFileMode.overwrite); String reportContent = reporter.GetContent(); File.WriteAllText(reportPath, reportContent); }
private void SetFolder(Int32 i = 0) { if (parent == null) { throw new ArgumentException(nameof(parent), "Parent state or Project must be set with construction of this state object!"); } folderNode parentFolder = parent.folder; if (parentFolder == null) { throw new ArgumentException(nameof(Project), "Parent state or Project must be set with construction of this state object!"); } if (_folder == null) { if (HasSubfolder) { _folder = parentFolder.Add(name, name, "Project state [" + Info.displayName + "] data." + Info.description); } else { _folder = parentFolder; } } if (_folder != null) { if (!_folder.path.StartsWith(parentFolder.path)) { if (i > 0) { throw new Exception("Folder [" + _folder.path + "] is not child of [" + parentFolder.path + "] -- after [" + i.ToString() + "] iterations of autosetup"); } SetFolder(i + 1); } } }
public static void Save(this HtmlSourceAndUrlCollection sources, folderNode folder, String filename = "htmlsource", Boolean deleteExisting = true) { if (deleteExisting) { folder.deleteFiles(); } if (filename.isNullOrEmpty()) { Int32 c = 0; foreach (var s in sources.items) { s.Save(folder, c.ToString()); c++; } } else { foreach (var s in sources.items) { s.Save(folder, filename); } } foreach (var sb in sources.SubCollections) { var f = folder.Add(sb.name, sb.name, "HTML sources subcollection of " + sources.name + "."); sb.Save(f, filename); } }
public void Init(folderNode _rootFolder, ITextRender logger) { storageFolder = _rootFolder; Finance = new FileSystemRecordProvider <FinanceOverviewRecords, FinanceOverview>(storageFolder.Add(nameof(Finance), nameof(Finance), "Records storage"), "fin_", "fin_"); InternationalTrade = new FileSystemRecordProvider <InternationalTradeRecords, InternationalTradeByCountry>(storageFolder.Add(nameof(InternationalTrade), nameof(InternationalTrade), "Records storage"), "int_", "int_"); BankAccounts = new FileSystemRecordProvider <CompanyBankAccountCollection, bankAccount>( storageFolder.Add(nameof(BankAccounts), nameof(BankAccounts), "Records storage"), "ban_", "ban_"); Persons = new FileSystemRecordProvider <CompanyPersonCollection, Person>( storageFolder.Add(nameof(Persons), nameof(Persons), "Records storage"), "per_", "per_"); Companies = new FileSystemRecordProvider <CompanyInformationCollection, CompanyInformation>(storageFolder, "acc_", "acc_"); Companies.OperationMode = RecordProviderOperationMode.singleCollectionMode; }
protected void deployFolders(folderNode _folder, String _name) { folder = _folder; name = _name; StructureModel = new reportStructureModel(name); node = StructureModel; folder_include = folder.Add("include", "include", "globally included resources"); }
/// <summary> /// Creates the context. /// </summary> /// <param name="name">The name.</param> /// <param name="folder">Root folder for experiment</param> /// <returns></returns> public PlanesMethodContext CreateContext(string name, folderNode folder) { var generalContext = new PlanesMethodContext(); generalContext.name = name; generalContext.folder = folder.Add(name, name, "Home folder of the experiment"); return(generalContext); }
public void TestNGramsAndSimilarity() { folderNode folder = new folderNode(); folder = folder.Add("NLP\\WordAnalysis", "Word analysis", "Folder with results of word analysis tests"); String[] words = new String[] { "ormar", "orman", "rashladni", "konstrukcija", "elektroinstalacija", "elektromotor", "motorno", "građevina", "građevinski", "metalni", "metalno", "metal", "aluminijum", "aluminijumski", "zgrada", "kotao", "kotlovski", "kotlarnica", "peć", "dimnjak", "cevovodi", "vod", "linija", "stanica", "elektrana", "elektrogradnja", "izgradnja", "gradjevinsko", "grejanje", "grejno", "gorivo", "goriva", "pelet", "panel", "polica", "stolica", "bakarni", "bronzani", "centrala", "obezbeđenje", "klimatizacija", "klimatizacioni", "ventilacija", "ventilacioni", "gorionik", "vatra", "voda", "cev", "proizvod", "proizvodni", "laser", "proizvodnja", "lasersko", "sečenje", "plazma", "merdevine", "čunak", "štednjak", "radijator", "elektro", "induktivno", "transformator", "transformatorska", "dalekovod", "elektrovod", "mašina", "šinski", "voz", "nadzemno", "visokogradnja", "podzemno", "transport", "prevoz", "izolacija", "plastika", "guma", "štender", "vitrina", "zamrzivač", "protivpožarna", "zaštita", "prodajna", "kontaktirajte", "kontakt", "kontakti", "telefon", "svetlo", "rasveta", "javna", "kompanija", "firma", "preduzeće", "društvo", "izvoz", "sto", "radni", "snaga", "napon", "krovni", "krov", "konstrukcioni", "konstruisanje", "tehničko", "tehnika", "zaposleni", "radnici", "reference", "kupci", "prodajni", "prodaja", "razvojni", "razvoj", "industrijski", "snabdevanje", "kućni", "nameštaj", "kancelarijski", "prostor", "podno", "pekara", "hleb", "pica", "peći", "pećnica", "žardinjera", "ograda", "čelična", "čelik", "galanterija", "stepenice", "nadvožnjak", "pešački", "saobraćajni", "znak", "tabla", "bilbord", "reklamni", "redni", "fluid", "hlađenje", "zagrevanje", "sagorevanje", "čvrsto", "pirolitički", "parni", "dim", "pepeo", "dopremanje", "čišćenje", "održavanje", "inoks", "inoksni", "inoksa", "razmenjivač", "toplote" }; StringBuilder sb = new StringBuilder(); foreach (String word in words) { sb.AppendLine(wordAnalysisTools.getNGramsDescriptiveLine(word, 2, nGramsModeEnum.overlap)); sb.AppendLine(wordAnalysisTools.getNGramsDescriptiveLine(word, 2, nGramsModeEnum.ordinal)); } String sbp = folder.pathFor("ngrams.txt", imbSCI.Data.enums.getWritableFileMode.autoRenameThis, "ngrams"); File.WriteAllText(sbp, sb.ToString()); wordSimilarityComponent component = new wordSimilarityComponent(); component.N = 2; component.gramConstruction = nGramsModeEnum.overlap; component.treshold = 0.6; component.equation = nGramsSimilarityEquationEnum.DiceCoefficient; var result01 = component.GetResult(words); String p = folder.pathFor("result01.txt", imbSCI.Data.enums.getWritableFileMode.autoRenameThis, "TestNGrams", false); File.WriteAllText(p, result01.ToString()); component.equation = nGramsSimilarityEquationEnum.JaccardIndex; var result02 = component.GetResult(words); p = folder.pathFor("result02.txt", imbSCI.Data.enums.getWritableFileMode.autoRenameThis, "TestNGrams", false); File.WriteAllText(p, result02.ToString()); component.equation = nGramsSimilarityEquationEnum.continualOverlapRatio; var result03 = component.GetResult(words); p = folder.pathFor("result03.txt", imbSCI.Data.enums.getWritableFileMode.autoRenameThis, "TestNGrams", false); File.WriteAllText(p, result03.ToString()); }
public classificationReportCollection AddOrGetChild(String subCollectionDirPath) { subCollectionDirPath = subCollectionDirPath.Trim("\\".ToArray()); if (!Children.ContainsKey(subCollectionDirPath)) { Children.Add(subCollectionDirPath, new classificationReportCollection(subCollectionDirPath)); var ch = Children[subCollectionDirPath]; ch.datasetName = datasetName; ch.rootFolder = rootFolder.Add(subCollectionDirPath, ch.name, ch.description); } return(Children[subCollectionDirPath]); }
private String GetFilenameAndSetInstanceFolder(IFileDataStructure instance, folderNode parentFolder = null, ILogBuilder output = null) { String filename = ""; // GetFilepath("", instance, false); try { switch (mode) { case fileStructureMode.subdirectory: //parentFolder = Directory.CreateDirectory(parentFolder.path); if (instance.folder == null) { instance.folder = parentFolder.Add(instance.name, instance.name, "Directory for [" + instance.GetType().Name + "]. " + instance.description); } else { if (instance.folder.name != instance.name) { instance.folder = parentFolder.Add(instance.name, instance.name, "Directory for [" + instance.GetType().Name + "]. " + instance.description); } } filename = type.Name.getCleanPropertyName().add(formatMode.GetExtension(), "."); break; case fileStructureMode.none: fileDataStructureExtensions.FileDataStructureError("Can't have File Data Structure loaded if no file structure mode specified", parentFolder, output, null, instance); //throw new NotImplementedException("Can't have File Data Structure loaded if no file structure mode specified"); break; } } catch (Exception ex) { fileDataStructureExtensions.FileDataStructureError("SaveDataStructure failed at designating folder and filename: " + ex.Message, parentFolder, output, ex, instance); } return(filename); }
/// <summary> /// Initializes a new instance of the <see cref="experimentCompositeTemplate"/> class. /// </summary> /// <param name="parentFolder">The parent folder.</param> public experimentCompositeTemplate(folderNode parentFolder) { folder = parentFolder.Add("CompositeTemplates", "Composite Experiment Templates", "Directory with template objects for separate aspects of the experiment"); folderForClassifierSets = folder.Add("Classifiers", "Classifier sets", "Directory with serialized templates on classifier sets"); folderForFeatureVectorExtractors = folder.Add("FeatureVectorExtractor", "Classifier sets", "Directory with serialized templates of Feature Vector Extractors"); folderForExperimentShells = folder.Add("Shells", "Experiment Shells", "Directory with shells - experiment settings outside classifiers and FVEs"); checkDefaults(); folder.generateReadmeFiles(appManager.AppInfo); }
/// <summary> /// Deploys the specified loger. /// </summary> /// <param name="loger">The loger.</param> /// <param name="workspace">The workspace.</param> public void deploy(ILogBuilder loger, folderNode workspace) { if (!dataSourcePath.isNullOrEmpty()) { //dataSource = csvFileExtensions.fromCsvFileToTable(dataSourcePath, dataSource, true); dataSource = imbDataTableExtensions.deserializeDataTable(dataSourcePath, imbSCI.Data.enums.reporting.dataTableExportEnum.excel, workspace); } if (!folderWithAttachmentsName.isNullOrEmpty()) { folderWithAttachments = workspace.Add(folderWithAttachmentsName, "Attachments", "Folder with attachment files, to be send via email"); } }
public void TestModificationLoad() { folderNode folder = new folderNode(); folder = folder.Add("diagnostic", "Diagnostic", "Directory used for testing"); var logger = new builderForLog(); String p = folder.pathFor("experimentTest2" + ".xml"); var test2 = objectSerialization.loadObjectFromXML <experimentSetup>(p, logger); Assert.AreEqual("MOD", test2.description); }
/// <summary> </summary> public void SetReportAndCacheFolder(folderNode folder, Boolean reselect = false) { if (reselect) { reportFolder = null; cacheFolder = null; } //folderNode ch_root = folder.parent as folderNode; if (cacheFolder == null) { cacheFolder = folder.Add("cache", "Cached content", "Folder with cached lexic resouce partition, used during the experiment"); } }
/// <summary> /// The constructor for main (root) wrapper model node /// </summary> /// <param name="parentFolder">The folder.</param> /// <param name="_name">The name.</param> /// <param name="_description">The description.</param> protected WrapperModelGraph(folderNode parentFolder, String _name, String _description) { name = _name; if (_description.isNullOrEmpty()) { description = "primary wrapper node - the root"; } else { description = _description; } type = WrapperTypeEnum.rootModel; _folder = parentFolder.Add(name, "Wrapper model " + _name, "Directory with content harvested by WrapperModel [" + name + "]. " + _description); }
public void DeployCase(kFoldValidationCase valCase, folderNode folderOverride = null) { var fl = folder; if (folderOverride != null) { fl = folderOverride; } valCase.kFoldMaster = this; valCase.trainingCases.kFoldMaster = this; valCase.evaluationCases.kFoldMaster = this; valCase.evaluationCases.kFoldCase = valCase; valCase.trainingCases.kFoldCase = valCase; valCase.folder = folder.Add(valCase.name, valCase.name, "Operational files and reports for k-fold [" + valCase.name + "]"); valCase.caseFolder = valCase.folder.Add("cases", "Cases", "Repository with knowledge on cases"); valCase.caseSampleFolder = valCase.caseFolder.Add("microAnalysis", "Micro-analysis of FV Extraction", "Randomly picked cases for Micro-analysis of FV Extraction - similarity computation between a case and a category"); }
/// <summary> /// Saves the specified folder. /// </summary> /// <param name="folder">The folder.</param> /// <param name="options">The options.</param> public void Save(String pathToSave, WebDomainCategoryFormatOptions options = WebDomainCategoryFormatOptions.saveReadmeFile | WebDomainCategoryFormatOptions.saveAggregate | WebDomainCategoryFormatOptions.normalizeDomainname, ILogBuilder logger = null) { DirectoryInfo di = new DirectoryInfo(pathToSave); folderNode folder = di; String domainList = GetDomainList(options, logger); String path = folder.pathFor(categorySiteList, imbSCI.Data.enums.getWritableFileMode.overwrite, "Web sites at this category", true); File.WriteAllText(path, domainList); StringBuilder sb = new StringBuilder(); foreach (WebDomainCategory category in this) { var subFolder = folder.Add(category.name, category.name, "Subcategory"); category.Save(subFolder.path, options, logger); if (options.HasFlag(WebDomainCategoryFormatOptions.saveAggregate)) { List <string> sites = GetSites(100); sites.ForEach(x => sb.AppendLine(x)); //sb.AppendLine(GetDomainList(options, logger)); } } if (options.HasFlag(WebDomainCategoryFormatOptions.saveAggregate)) { path = folder.pathFor(categoryAggregateSiteList, imbSCI.Data.enums.getWritableFileMode.overwrite, "Web sites at this category, including subcategories", true); File.WriteAllText(path, sb.ToString()); } if (this.root == this) { if (options.HasFlag(WebDomainCategoryFormatOptions.saveGraphAtRoot)) { var dgml = GraphConverters.DataSetDomainGraphConverter.Convert(this, 300); //imbSCI.Graph.Converters.GraphConversionTools.DefaultGraphToDGMLConverterInstance.Convert(this, 300); //.ConvertToDGML(this, 300); dgml.Save(folder.pathFor("dataset.dgml", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphMarkupLanguage representation of categories", true), imbSCI.Data.enums.getWritableFileMode.overwrite); } if (options.HasFlag(WebDomainCategoryFormatOptions.saveReadmeFile)) { folder.generateReadmeFiles(imbACE.Core.appManager.AppInfo); } } }
public helpBuilderContext(helpBuilderConfiguration _configuration, Object _scope, string _filename = "") { configuration = _configuration; scope = _scope; scopeType = scope.GetType(); scopeEntry = new settingsMemberInfoEntry(scopeType); if (_filename.isNullOrEmpty()) { _filename = "help"; } filename = _filename; folder = new folderNode(configuration.outputPath, "Help", "Generated help content"); folder = folder.Add(scopeType.Name, scopeType.Name, "Help output for [" + scopeType.Name + "]"); resourcesFolder = new folderNode(configuration.resourcesPath, "Help resources", "Content resources for help generation"); // }
public void Publish(Dictionary <HtmlNode, String> labelsByDocument, Dictionary <HtmlNode, HtmlSourceAndUrl> documentNodeDictionary, folderNode folderWithResults, DocumentSimilarityResult result) { var cluster = this; folderNode cFolder = folderWithResults.Add(cluster.name, cluster.name, "Directory for cluster " + cluster.name); result.Publish(documentNodeDictionary, cFolder, cluster.items); builderForText reporter = new builderForText(); reporter.AppendHeading("Name: " + cluster.name); reporter.AppendPair("Items", cluster.items.Count); if (cluster.ClusterSeed != null) { reporter.AppendPair("Seed", labelsByDocument[cluster.ClusterSeed]); } foreach (var pair in cluster.range.GetDictionary()) { reporter.AppendPair(pair.Key, pair.Value.ToString("F3")); } foreach (var item in cluster.items) { if (item != cluster.ClusterSeed) { if (cluster.scoreDictionary.ContainsKey(item)) { String label = labelsByDocument[item]; Double score = cluster.scoreDictionary[item]; HtmlSourceAndUrl source = documentNodeDictionary[item]; reporter.AppendLine("-----------------------------------"); reporter.AppendLine(label + " => " + score.ToString("F3")); reporter.AppendLine("Filepath: " + source.filepath); reporter.AppendLine("Url: " + source.url); } } } String reportPath = cFolder.pathFor("report.txt", imbSCI.Data.enums.getWritableFileMode.overwrite); String reportContent = reporter.GetContent(); File.WriteAllText(reportPath, reportContent); }
public void TestExperimentSetupLoadSave() { folderNode folder = new folderNode(); folder = folder.Add("diagnostic", "Diagnostic", "Directory used for testing"); var logger = new builderForLog(); var test = experimentSetup.GetDefaultExperimentSetup(); test.name = "experimentTest"; test.description = "testing experiment load and save"; String p = folder.pathFor(test.name + ".xml"); objectSerialization.saveObjectToXML(test, p); var test2 = objectSerialization.loadObjectFromXML <experimentSetup>(p, logger); Assert.AreEqual(test.name, test2.name); Assert.AreEqual(test.description, test2.description); Assert.AreEqual(test.featureVectorExtractors_semantic.Count, test2.featureVectorExtractors_semantic.Count); }
/// <summary> /// Saves the web sites. /// </summary> /// <param name="category">The category.</param> /// <param name="rootFolder">The root folder.</param> protected void SaveWebSites(WebDocumentsCategory category, folderNode rootFolder, WebDomainCategoryFormatOptions options, ILogBuilder logger = null) { folderNode folder = rootFolder.Add(category.name, category.name, category.description); StringBuilder domainList = new StringBuilder(); foreach (WebSiteDocuments site in category.siteDocuments) { domainList.AppendLine(site.domain); SaveWebSite(site, folder); /* * foreach (WebSiteDocument page in site.documents) * { * * String filename = site.domain.add(page.path, "/"); * filename = filename.Replace("//", "/"); * filename = "http://" + filename; * filename = GetFilenameFromURLPath(filename); * filename = WebSiteDocumentsSetTools.GetSafeFilename(filename); * * String p = folder.pathFor(filename, imbSCI.Data.enums.getWritableFileMode.existing, "Page of [" + site.domain + "] at path [" + page.path + "]", false); * * String source = GetWebDocumentSource(page); * if (!File.Exists(p)) * { * File.WriteAllText(p, source); * } * }*/ } if (options.HasFlag(WebDomainCategoryFormatOptions.saveDomainList)) { File.WriteAllText(folder.pathFor(WebDomainCategory.categorySiteList, imbSCI.Data.enums.getWritableFileMode.overwrite, "Domains in category [" + category.path + "]", true), domainList.ToString()); } SaveSubcategories(category, folder, options); }
/// <summary> /// Initializes a new instance of the <see cref="HarvesterExecutionContext"/> class. /// </summary> /// <param name="_project">The project.</param> /// <param name="_unit">The unit.</param> /// <param name="_parentFolder">The parent folder.</param> public HarvesterExecutionContext(HarvesterProject _project, HarvesterUnit _unit, folderNode _parentFolder) { project = _project; unit = _unit; folder = _parentFolder.Add(_project.name, _project.name + " output", _project.description); }
public void TestCloudWeaver() { folderNode folder = new folderNode(); folderNode weaverFolder = folder.Add("NLP\\CloudWeaver", "Cloud Weaver", "Folder with results of cloud weaver tests"); folderNode cloudFolder = folder.Add("Clouds", "Test resources", ""); folderNode resourceFolder = folder.Add("resources", "Test resources", ""); lemmaSemanticWeaver weaver = new lemmaSemanticWeaver(); weaver.prepare(resourceFolder, null); weaver.useSimilarity = true; weaver.similarWords.N = 2; weaver.similarWords.gramConstruction = nGramsModeEnum.overlap; weaver.similarWords.treshold = 0.6; weaver.similarWords.equation = nGramsSimilarityEquationEnum.DiceCoefficient; weaver.useDictionary = false; var cloudPaths = cloudFolder.findFiles("*_initialCloud.xml", SearchOption.TopDirectoryOnly); foreach (String path in cloudPaths) { lemmaSemanticCloud testCloud = lemmaSemanticCloud.Load <lemmaSemanticCloud>(path); testCloud.GetSimpleGraph(false).Save(weaverFolder.pathFor(testCloud.className + "_initial.dgml", imbSCI.Data.enums.getWritableFileMode.overwrite), imbSCI.Data.enums.getWritableFileMode.overwrite); var report = weaver.Process(testCloud, null); report.Save(weaverFolder, "DiceCoefficient"); } weaver.similarWords.equation = nGramsSimilarityEquationEnum.JaccardIndex; foreach (String path in cloudPaths) { lemmaSemanticCloud testCloud = lemmaSemanticCloud.Load <lemmaSemanticCloud>(path); var report = weaver.Process(testCloud, null); report.Save(weaverFolder, "JaccardIndex"); } weaver.similarWords.equation = nGramsSimilarityEquationEnum.continualOverlapRatio; foreach (String path in cloudPaths) { lemmaSemanticCloud testCloud = lemmaSemanticCloud.Load <lemmaSemanticCloud>(path); var report = weaver.Process(testCloud, null); report.Save(weaverFolder, "ContinualOverlap"); objectSerialization.saveObjectToXML(testCloud, weaverFolder.pathFor(testCloud.className + "_weaved.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Processed cloud")); testCloud.GetSimpleGraph(false).Save(weaverFolder.pathFor(testCloud.className + "_weaved.dgml", imbSCI.Data.enums.getWritableFileMode.overwrite), imbSCI.Data.enums.getWritableFileMode.overwrite); } //weaver.similarWords.equation = nGramsSimilarityEquationEnum.continualOverlapRatio; //weaver.useDictionary = true; //foreach (String path in cloudPaths) //{ // lemmaSemanticCloud testCloud = lemmaSemanticCloud.Load<lemmaSemanticCloud>(path); // var report = weaver.Process(testCloud, null); // report.Save(weaverFolder, "JaccardIndexAndApertium"); //} folder.generateReadmeFiles(new imbSCI.Core.data.aceAuthorNotation()); }
/// <summary> /// Creates report table version for the <c>source</c> and saves the report on specified <c>folder</c> /// </summary> /// <param name="source">The source.</param> /// <param name="folder">The folder.</param> /// <param name="notation">The notation.</param> /// <param name="filenamePrefix">The filename prefix.</param> /// <param name="disablePrimaryKey">if set to <c>true</c> [disable primary key].</param> /// <param name="allowAsyncCall">if set to <c>true</c> [allow asynchronous call].</param> /// <returns></returns> /// <exception cref="ArgumentNullException">Folder is null! at GetReportAndSave() for [" + source.TableName + "] at filename [" + filenamePrefix + "]</exception> public static DataTableForStatistics GetReportAndSave(this DataTable source, folderNode folder, aceAuthorNotation notation = null, string filenamePrefix = "", bool disablePrimaryKey = true, Boolean allowAsyncCall = false) { if (notation == null) { notation = new aceAuthorNotation(); } if (allowAsyncCall) { if (imbSCI.Core.config.imbSCICoreConfig.settings.DataTableReports_AsyncExportCalls) { DataTableForStatisticsExportJob job = new DataTableForStatisticsExportJob(source, folder, notation, filenamePrefix, disablePrimaryKey); Thread t = new Thread(job.Do); t.Start(); return(null); // Task.Factory } } // if (source == null) return new DataTableForStatistics(); if (folder == null) { throw new ArgumentNullException("Folder is null! at GetReportAndSave() for [" + source.TableName + "] at filename [" + filenamePrefix + "]"); } if (source.Columns.Count > 0) { folderNode dataFolder = null; if (DataTableForStatistics.AUTOSAVE_CleanDataTable || DataTableForStatistics.AUTOSAVE_FieldsText || imbSCI.Core.config.imbSCICoreConfig.settings.DataTableReports_DoExportXMLData) { dataFolder = folder.Add(EXTRAFOLDER, "Excel report meta data", "Folder containing clean data export (single header row, CSV format) for easier use by other software platforms and/or column meta descriptions - additional information - in separate txt file for each Excel report created."); } if (imbSCI.Core.config.imbSCICoreConfig.settings.DataTableReports_DoExportXMLData) { try { String xmlCode = objectSerialization.ObjectToXML(source); xmlCode.saveStringToFile(dataFolder.pathFor(source.TableName.getFilename(".xml"), getWritableFileMode.overwrite, "XML Serialized DataTable [" + source.GetTitle() + "]", true)); } catch (Exception ex) { source.SetAdditionalInfoEntry("XML data", "Serialization failed: " + ex.Message); } } if (DataTableForStatistics.AUTOSAVE_CleanDataTable) { string cld = source.serializeDataTable(dataTableExportEnum.csv, PREFIX_CLEANDATATABLE + filenamePrefix.getFilename() + ".csv", dataFolder, notation); source.SetAdditionalInfoEntry("Clean data", cld); } if (DataTableForStatistics.AUTOSAVE_FieldsText) { string cli = dataFolder.pathFor(PREFIX_COLUMNINFO + filenamePrefix.getFilename() + ".txt"); source.GetUserManualForTableSaved(cli); source.SetAdditionalInfoEntry("Column info", cli); } if (tableReportCreation_insertFilePathToTableExtra) { } } DataTableForStatistics output = null; if (source is DataTableForStatistics) { output = source as DataTableForStatistics; } else { output = source.GetReportTableVersion(disablePrimaryKey); // output.SetDefaults(); //source.serializeDataTable(enums.dataTableExportEnum.excel, filenamePrefix + "_source", folder, notation); } output.Save(folder, notation, filenamePrefix); return(output); }
public void FullReport(folderNode folder, String datasetA_name = "A", String datasetB_name = "B", String runName = "DataSets") { folder = folder.Add(runName, runName, "Reports on datasets [" + datasetA_name + "," + datasetB_name + "]"); #region --------------- TERM LISTS ------------------------------ String tknA_p = folder.pathFor("tokens_" + datasetA_name + ".txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "List of all unique terms in the dataset [" + datasetA_name + "]"); String tknB_p = folder.pathFor("tokens_" + datasetB_name + ".txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "List of all unique terms in the dataset [" + datasetB_name + "]"); String tknC_p = folder.pathFor("tokens_common.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "List of all unique terms, existing in both datasets"); String tknA_u_p = folder.pathFor("tokens_" + datasetA_name + "_specific.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "List of all unique terms, being spcific to the dataset [" + datasetA_name + "], and not part of the other dataset"); String tknB_u_p = folder.pathFor("tokens_" + datasetB_name + "_specific.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "List of all unique terms, being spcific to the dataset [" + datasetB_name + "], and not part of the other dataset"); File.WriteAllText(tknA_p, tknA.toCsvInLine(",")); File.WriteAllText(tknB_p, tknB.toCsvInLine(",")); File.WriteAllText(tknC_p, tknC.toCsvInLine(",")); File.WriteAllText(tknA_u_p, tknA_u.toCsvInLine(",")); File.WriteAllText(tknB_u_p, tknB_u.toCsvInLine(",")); #endregion #region ------------------ domain list String docSetUA_p = folder.pathFor("domains_" + datasetA_name + "_specific.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "List of all domains, being unique for the dataset [" + datasetA_name + "]"); String docSetUB_p = folder.pathFor("domains_" + datasetB_name + "_specific.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "List of all domains, being unique for the dataset [" + datasetB_name + "]"); String docSetUC_p = folder.pathFor("domains_common.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "List of all domains, being common to both datasets"); String docSetA_p = folder.pathFor("domains_" + datasetA_name + ".txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "List of all domains in the dataset"); String docSetB_p = folder.pathFor("domains_" + datasetB_name + ".txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "List of all domains in the dataset"); File.WriteAllLines(docSetUA_p, DocumentSetsUniqueForA); File.WriteAllLines(docSetUB_p, DocumentSetsUniqueForB); File.WriteAllLines(docSetUC_p, DocumentSetsInCommonByName); File.WriteAllLines(docSetA_p, analyticA.domains); File.WriteAllLines(docSetB_p, analyticB.domains); #endregion analyticA.GetDataTable(datasetA_name + "_contentAnalysis").GetReportAndSave(folder, new imbSCI.Core.data.aceAuthorNotation()); analyticB.GetDataTable(datasetB_name + "_contentAnalysis").GetReportAndSave(folder, new imbSCI.Core.data.aceAuthorNotation()); // HtmlTagCategoryTree dataSetASummary = new HtmlTagCategoryTree(datasetA_name, "HTML Tags statistics"); foreach (var pair in analyticA.categoryNameVsHtmlTag) { pair.Value.GetDataTable(datasetA_name + "_" + pair.Key + "_htmlTag", pair.Value.description).GetReportAndSave(folder, new imbSCI.Core.data.aceAuthorNotation()); // dataSetASummary.Merge(pair.Value); } analyticA.GlobalCategoryTree.GetDataTable(datasetA_name + "_htmlTag_all", "Aggregate HTML tags statistics").GetReportAndSave(folder, new imbSCI.Core.data.aceAuthorNotation()); // HtmlTagCategoryTree dataSetBSummary = new HtmlTagCategoryTree(datasetB_name, "HTML Tags statistics"); foreach (var pair in analyticB.categoryNameVsHtmlTag) { pair.Value.GetDataTable(datasetB_name + "_" + pair.Key + "_htmlTag", pair.Value.description).GetReportAndSave(folder, new imbSCI.Core.data.aceAuthorNotation()); // dataSetBSummary.Merge(pair.Value); } // dataSetBSummary.GetDataTable(datasetB_name + "_htmlTag_all", "Aggregate HTML tags statistics").GetReportAndSave(folder, new imbSCI.Core.data.aceAuthorNotation()); analyticB.GlobalCategoryTree.GetDataTable(datasetB_name + "_htmlTag_all", "Aggregate HTML tags statistics").GetReportAndSave(folder, new imbSCI.Core.data.aceAuthorNotation()); GetPCE().getDataTable().Save(folder, new imbSCI.Core.data.aceAuthorNotation(), "ComparisonMetrics"); folder.generateReadmeFiles(new imbSCI.Core.data.aceAuthorNotation()); }
/// <summary> /// Prepares for parallel execution. /// </summary> /// <param name="tools">The tools.</param> /// <param name="_context">The context.</param> public webProjectKnowledgeSet PrepareForParallelExecution(classifierTools tools, experimentExecutionContext _context) { if (caseKnowledgeSet == null) { caseKnowledgeSet = new webProjectKnowledgeSet(); } if (items.Any()) { experimentContext.notes.log("Mining Context was ready already."); return(caseKnowledgeSet); } DateTime startTime = DateTime.Now; experimentContext = _context; List <webCaseKnowledge> cases = new List <webCaseKnowledge>(); folderNode classReportFolder = experimentContext.folder.Add("General", "General and diagnostic reports", "The folder contains general (outside k-folds) reports on analysied industries (categories), web sites and other diagnostic data"); // <---------------------------------------------------------------------------------------------------------------- [ performing pipeline ] experimentContext.notes.log("Executing the Mining Context decomposition with the pipeline model"); foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses()) { var pipelineContext = GetContextForPipeline(tools, classSet); sitesByCategory.Add(classSet, new List <pipelineTaskMCSiteSubject>()); if (!pipelineContext.exitByType.ContainsKey(typeof(pipelineTaskMCSiteSubject))) { throw new aceGeneralException("Pipeline context output contains no web site subjects! Check the pipeline Site Task constructor.", null, pipelineContext, "Pipeline broken"); } var sitesForContext = pipelineContext.exitByType[typeof(pipelineTaskMCSiteSubject)]; // <----- preparing foreach (var site in sitesForContext) { tokenBySite.Add(site as pipelineTaskMCSiteSubject, new ConcurrentBag <pipelineTaskSubjectContentToken>()); sitesByCategory[classSet].Add(site as pipelineTaskMCSiteSubject); webCaseKnowledge webCase = new webCaseKnowledge(site as pipelineTaskMCSiteSubject, classSet); caseKnowledgeSet.Add(webCase); cases.Add(webCase); } semanticFVExtractorKnowledge kn = new semanticFVExtractorKnowledge(); kn.name = classSet.name + "_general"; kn.relatedItemPureName = classSet.name; kn.type = WebFVExtractorKnowledgeType.aboutCompleteCategory; kn.Deploy(classReportFolder, experimentContext.logger); knowledgeByClass.TryAdd(classSet, kn); } experimentContext.notes.log("Sorting tokens for all sites [in parallel]"); Parallel.ForEach(tokenBySite.Keys, site => { var leafs = site.getAllLeafs(); foreach (var leaf in leafs) { pipelineTaskSubjectContentToken token = leaf as pipelineTaskSubjectContentToken; if (token != null) { tokenBySite[site].Add(token); } } }); foreach (var c in cases) { c.tokens = tokenBySite[c.MCSiteSubject]; } experimentContext.notes.log("Building diagnostic TF-IDF master tables for all classes [in parallel]"); Boolean useIntegratedApproach = false; if (useIntegratedApproach) { var valCase = experimentContext.validationCollections[experimentContext.masterExtractor.name].GetDiagnosticCase(experimentContext.classes); Parallel.ForEach(sitesByCategory, pair => { knowledgeByClass.TryAdd(pair.Key, experimentContext.masterExtractor.DoFVExtractionForClassViaCases(valCase.trainingCases[pair.Key.classID], pair.Key, valCase, experimentContext.tools, experimentContext.logger)); }); } else { Parallel.ForEach(sitesByCategory, pair => { IDocumentSetClass category = pair.Key; List <pipelineTaskMCSiteSubject> sites = pair.Value; var lt = BuildLemmaTableForClass(tools, category, sites); lt.Save(); // lt.SaveAs(classReportFolder.pathFor(lt.info.Name), imbSCI.Data.enums.getWritableFileMode.overwrite); }); } experimentContext.notes.log("Saving lexic resource cache subset - for later reuse in case of repeated experiment run"); tools.SaveCache(); if (!useIntegratedApproach) { experimentContext.notes.log("Performing chunk construction for all web sites in all categories [in serial]"); foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses()) { BuildChunksForClass(tools, classSet); } foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses()) { experimentContext.masterExtractor.chunkTableConstructor.process(chunksByCategory[classSet], cnt_level.mcPage, knowledgeByClass[classSet].WLChunkTableOfIndustryClass, null, experimentContext.logger, false); } } if (tools.operation.doCreateDiagnosticMatrixAtStart) { experimentContext.notes.log("Performing diagnostic analysis on all categories...[doCreateDiagnosticMatrixAtStart=true]"); folderNode matrixReport = classReportFolder.Add("clouds", "More reports on semantic cloud", "Directory contains exported DirectedGraphs, varous matrix derivates, combined cloud and other diagnostic things"); List <lemmaSemanticCloud> clouds = new List <lemmaSemanticCloud>(); List <lemmaSemanticCloud> filteredClouds = new List <lemmaSemanticCloud>(); var converter = lemmaSemanticCloud.GetDGMLConverter(); foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses()) { // experimentContext.masterExtractor.chunkTableConstructor.process(chunksByCategory[classSet], cnt_level.mcPage, knowledgeByClass[classSet].WLChunkTableOfIndustryClass, null, experimentContext.logger, false); var cloud = experimentContext.masterExtractor.CloudConstructor.process(knowledgeByClass[classSet].WLChunkTableOfIndustryClass, knowledgeByClass[classSet].WLTableOfIndustryClass, knowledgeByClass[classSet].semanticCloud, experimentContext.logger, tokenBySite.Keys.ToList(), tools.GetLemmaResource()); knowledgeByClass[classSet].semanticCloud.className = classSet.name; clouds.Add(cloud); if (experimentContext.tools.operation.doUseSimpleGraphs) { cloud.GetSimpleGraph(true).Save(matrixReport.pathFor("cloud_initial_" + classSet.name, imbSCI.Data.enums.getWritableFileMode.none, "Initial version of full-sample set, diagnostic Semantic Cloud for category [" + classSet.name + "]")); } else { converter.ConvertToDMGL(cloud).Save(matrixReport.pathFor("cloud_initial_" + classSet.name, imbSCI.Data.enums.getWritableFileMode.none, "Initial version of full-sample set, diagnostic Semantic Cloud for category [" + classSet.name + "]")); } knowledgeByClass[classSet].semanticCloudFiltered = knowledgeByClass[classSet].semanticCloud.CloneIntoType <lemmaSemanticCloud>(true); knowledgeByClass[classSet].semanticCloudFiltered.className = classSet.name; filteredClouds.Add(knowledgeByClass[classSet].semanticCloudFiltered); } cloudMatrix matrix = new cloudMatrix("CloudMatrix", "Diagnostic cloud matrix created from the complete sample set of [" + clouds.Count() + "] classes"); matrix.build(filteredClouds, experimentContext.logger); lemmaSemanticCloud mergedCloudInitial = matrix.GetUnifiedCloud(); mergedCloudInitial.Save(matrixReport.pathFor("unified_initial_cloud.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized object - Initial version of Semantic Cloud built as union of full-sample set Semantic Clouds of all categories")); var reductions = matrix.TransformClouds(experimentContext.masterExtractor.settings.semanticCloudFilter, experimentContext.logger); var p = matrixReport.pathFor("reductions_nodes.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "Report on Cloud Matrix transformation process"); File.WriteAllLines(p, reductions); matrix.BuildTable(experimentContext.masterExtractor.settings.semanticCloudFilter, cloudMatrixDataTableType.initialState | cloudMatrixDataTableType.maxCloudFrequency | cloudMatrixDataTableType.absoluteValues).GetReportAndSave(matrixReport, appManager.AppInfo, "matrix_max_cf_initial", true, experimentContext.tools.operation.doReportsInParalell); matrix.BuildTable(experimentContext.masterExtractor.settings.semanticCloudFilter, cloudMatrixDataTableType.initialState | cloudMatrixDataTableType.overlapSize | cloudMatrixDataTableType.absoluteValues).GetReportAndSave(matrixReport, appManager.AppInfo, "matrix_overlap_size_initial", true, experimentContext.tools.operation.doReportsInParalell); matrix.BuildTable(experimentContext.masterExtractor.settings.semanticCloudFilter, cloudMatrixDataTableType.initialState | cloudMatrixDataTableType.overlapValue | cloudMatrixDataTableType.absoluteValues).GetReportAndSave(matrixReport, appManager.AppInfo, "matrix_overlap_value_initial", true, experimentContext.tools.operation.doReportsInParalell); matrix.ExportTextReports(matrixReport, true, "matrix_cf"); matrix.ExportTextReports(matrixReport, false, "matrix_cf"); lemmaSemanticCloud mergedCloudAfterReduction = matrix.GetUnifiedCloud(); mergedCloudAfterReduction.Save(matrixReport.pathFor("unified_reduced_cloud.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized object -Version of all-categories diagnostic Semantic Cloud, after Cloud Matrix filter was applied")); if (experimentContext.tools.operation.doUseSimpleGraphs) { mergedCloudInitial.GetSimpleGraph(true).Save(matrixReport.pathFor("unified_initial_cloud", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphML file - unified Semantic Cloud, before Cloud Matrix filter was applied - Open this in VisualStudo)")); } else { converter = lemmaSemanticCloud.GetDGMLConverter(); converter.ConvertToDMGL(mergedCloudInitial).Save(matrixReport.pathFor("unified_initial_cloud", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphML file - unified Semantic Cloud, before Cloud Matrix filter was applied - Open this in VisualStudo)")); } // <-------- analysis ----------------------------------------------------------------------------------- DataTableTypeExtended <freeGraphReport> cloudReports = new DataTableTypeExtended <freeGraphReport>(); foreach (var cl in filteredClouds) { freeGraphReport fgReport = new freeGraphReport(cl); fgReport.Save(matrixReport); cloudReports.AddRow(fgReport); } freeGraphReport unifiedReport = new freeGraphReport(mergedCloudAfterReduction); unifiedReport.Save(matrixReport); cloudReports.AddRow(unifiedReport); cloudReports.GetReportAndSave(matrixReport, appManager.AppInfo, "analysis_SemanticClouds"); // <-------- analysis ----------------------------------------------------------------------------------- foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses()) { var cloud = knowledgeByClass[classSet].semanticCloudFiltered; // .WLChunkTableOfIndustryClass, knowledgeByClass[classSet].WLTableOfIndustryClass, knowledgeByClass[classSet].semanticCloud, experimentContext.logger, tokenBySite.Keys.ToList()); if (experimentContext.tools.operation.doUseSimpleGraphs) { cloud.GetSimpleGraph(true).Save(matrixReport.pathFor("unified_initial_cloud", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphML file - unified Semantic Cloud, before Cloud Matrix filter was applied - Open this in VisualStudo)")); } else { converter = lemmaSemanticCloud.GetDGMLConverter(); converter.ConvertToDMGL(cloud).Save(matrixReport.pathFor("unified_initial_cloud", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphML file - unified Semantic Cloud, before Cloud Matrix filter was applied - Open this in VisualStudo)")); } //converter.ConvertToDMGL(cloud).Save(matrixReport.pathFor("cloud_reduced_" + classSet.name, imbSCI.Data.enums.getWritableFileMode.none, "DirectedGraphML file - Initial version of Semantic Cloud built as union of full-sample set Semantic Clouds of all categories (Open this with VS)"), imbSCI.Data.enums.getWritableFileMode.overwrite); } instanceCountCollection <String> tfcounter = new instanceCountCollection <string>(); foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses()) { var wlt = knowledgeByClass[classSet].WLTableOfIndustryClass.GetDataTable(); wlt.DefaultView.Sort = "termFrequency desc"; var sorted = wlt.DefaultView.ToTable(); var tbl = wlt.GetClonedShema <DataTable>(true); tbl.CopyRowsFrom(sorted, 0, 100); tbl.GetReportAndSave(classReportFolder, appManager.AppInfo, classSet.name + "_WebLemma", true, experimentContext.tools.operation.doReportsInParalell); var cht = knowledgeByClass[classSet].WLChunkTableOfIndustryClass.GetDataTable(); cht.DefaultView.Sort = "termFrequency desc"; var csorted = cht.DefaultView.ToTable(); tbl = cht.GetClonedShema <DataTable>(true); tbl.CopyRowsFrom(csorted, 0, 100); tbl.GetReportAndSave(classReportFolder, appManager.AppInfo, classSet.name + "_Chunks", true, experimentContext.tools.operation.doReportsInParalell); tfcounter.AddInstanceRange(knowledgeByClass[classSet].WLTableOfIndustryClass.unresolved); knowledgeByClass[classSet].OnBeforeSave(); } List <String> countSorted = tfcounter.getSorted(); StringBuilder sb = new StringBuilder(); foreach (String s in countSorted) { sb.AppendLine(String.Format("{1} : {0}", s, tfcounter[s])); } String pt = classReportFolder.pathFor("unresolved_tokens.txt", imbSCI.Data.enums.getWritableFileMode.none, "Cloud Frequency list of all unresolved letter-only tokens"); File.WriteAllText(pt, sb.ToString()); } if (tools.operation.doFullDiagnosticReport) { experimentContext.notes.log("Generating full diagnostic report on classes..."); DataTable rep = null; foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses()) { rep = this.GetClassKnowledgeReport(classSet, rep); } rep.SetAdditionalInfoEntry("Experiment", experimentContext.setup.name); rep.AddExtra("Experiment: " + experimentContext.setup.name); rep.AddExtra("Info: " + experimentContext.setup.description); rep.SetDescription("Structural report for all classes in the experiment"); rep.GetReportAndSave(classReportFolder, appManager.AppInfo, "structural_class_report", true, experimentContext.tools.operation.doReportsInParalell); } classReportFolder.generateReadmeFiles(appManager.AppInfo); experimentContext.notes.log("Mining Context preprocessing done in [" + DateTime.Now.Subtract(startTime).TotalMinutes.ToString("F2") + "] minutes"); return(caseKnowledgeSet); }
/// <summary> /// Analysises the specified folder. /// </summary> /// <param name="folder">The folder.</param> /// <param name="log">The log.</param> public void Analysis(folderNode folder, ILogBuilder log) { if (globalReport == null && settings.RequiredScopes.HasFlag(CWPAnalysisScopeEnum.globalLevel)) { if (log != null) { log.log("Making global report"); } FeatureCWPAnalysisEntryReport entryReport = null; entryReport = new FeatureCWPAnalysisEntryReport("Global", "Feature analysis for complete dataset ", folder?.Add("_global", "Global", "GlobalReport"), settings.purpose); if (log != null) { log.log("Making global dataset report"); } foreach (var term in datasetStatsModel.terms.GetTokens()) { FeatureCWPAnalysisSiteMetrics metrics = datasetStatsModel.QueryForTerm(term); // QueryTermGlobalLevel(term); SetMetrics(metrics); entryReport.Append(metrics, false); } entryReport.Save(log); globalReport = entryReport; } if (!categoryReports.Any() && settings.RequiredScopes.HasFlag(CWPAnalysisScopeEnum.categoryLevel)) { if (folder != null) { foreach (SpaceDocumentStatsModel category in datasetStatsModel.Children) { folder.Add(category.name, category.name, ""); } } if (log != null) { log.log("Making category level dataset reports"); } foreach (var category in datasetStatsModel.Children) { FeatureCWPAnalysisEntryReport rp = null; if (folder != null) { rp = SubAnalysis(category, folder[category.name], log); } else { rp = SubAnalysis(category, null, log); } rp.Save(log, false); categoryReports.Add(rp); } } if (datasetReport == null && settings.RequiredScopes.HasFlag(CWPAnalysisScopeEnum.mainLevel)) { if (log != null) { log.log("Making main level report"); } datasetReport = new FeatureCWPAnalysisDatasetReport("Dataset", "Final report on the dataset", folder?.Add("_main", "main", ""), categoryReports); datasetReport.Save(log); } if (unitaryReport == null && settings.RequiredScopes.HasFlag(CWPAnalysisScopeEnum.unitaryLevel)) { if (log != null) { log.log("Making unitary report"); } unitaryReport = new FeatureCWPAnalysisEntryReport("Fusioned report", "Cross category report with MAX(particularity) and MAX(commonality)", folder?.Add("_unitary", "Unitary", ""), settings.purpose); foreach (var pair in categoryReports) { foreach (System.Collections.Generic.KeyValuePair <string, FeatureCWPAnalysisSiteMetrics> e in pair) { SetMetrics(e.Value); unitaryReport.AddMerge(e.Value, false); } } unitaryReport.PostMerge(); unitaryReport.Save(log, false); } if (frequencies == null && settings.RequiredScopes.HasFlag(CWPAnalysisScopeEnum.rawLevel)) { if (log != null) { log.log("Making raw report"); } frequencies = new FeatureCWPFrequencyDictionary(); frequencies.Deploy(datasetStatsModel); frequencies.PublishTableBlocks(folder.Add("_freq", "Frequencies", "Absolute frequencies by scope")); } if (flatReport == null && settings.RequiredScopes.HasFlag(CWPAnalysisScopeEnum.flatSiteLevel)) { if (log != null) { log.log("Making flat report"); } flatReport = new FeatureCWPAnalysisEntryReport("Flat report", "Report produced as if all sites are in single cateogry", folder?.Add("_flat", "Flat", ""), settings.purpose); flatReport = SubAnalysis(flatDataSetStatsModel, flatReport.folder, log); flatReport.Save(log, false); } }
public void Publish(folderNode folder, DirectedGraphWithSourceData InitialGraph) { ColorGradientForInstanceEnumeration <String> chunkInstanceGradient = new ColorGradientForInstanceEnumeration <String>("#f7941d", "#6dcff6"); chunkInstanceGradient.Prepare(items.Select(x => x.ExtractorName)); if (InitialGraph == null) { return; } NodeGraph chunkRootGraph = BuildChunkRootsGraph(); var chunkRootGraphNodes = InitialGraph.Nodes.Get(chunkRootGraph.getAllChildren().Select(x => x.path)).ToDictionary(x => x.Id); foreach (var gnp in chunkRootGraphNodes) { if (gnp.Value is Node) { gnp.Value.Background = "#999999"; gnp.Value.StrokeDashArray = "2,5,2,5,2,5"; gnp.Value.StrokeThinkness = 2; } } List <NodeGraph> TargetRootNodes = chunkRootGraph.getAllChildrenInType <NodeGraph>().Where(x => x.HasMetaData()).ToList(); ListDictionary <ContentChunk, DirectedGraph> subgraphs = new ListDictionary <ContentChunk, DirectedGraph>(); NodeDictionaryGraphStyleSettings style = new NodeDictionaryGraphStyleSettings(); foreach (NodeGraph ng in TargetRootNodes) { var TargetRootNodesInContentGraph = InitialGraph.Select <NodeGraph>(new List <NodeGraph>() { ng }, x => x.path, true, true); ContentChunk chunk = ng.GetMetaData <ContentChunk>(); var nodeColor = chunkInstanceGradient.GetColor(chunk.ExtractorName, true); foreach (var pair in TargetRootNodesInContentGraph) { if (pair.Key is Node node) { var allLinked = chunk.PublishAnnotation(InitialGraph, nodeColor, style); var sg = InitialGraph.GetSubgraph(allLinked.SelectMany(x => x)); sg.Title = chunk.name; subgraphs[chunk].Add(sg); } else if (pair.Key is Link link) { // link.Stroke = nodeColor; } } } foreach (var pair in subgraphs) { Int32 grpi = 0; foreach (var grp in pair.Value) { String grpp = "chunk_subgraph" + pair.Key.name + grpi.ToString() + ".dgml"; grp.Save(folder.pathFor(grpp, imbSCI.Data.enums.getWritableFileMode.overwrite), imbSCI.Data.enums.getWritableFileMode.overwrite); grpi++; } } InitialGraph.Save(folder.pathFor("CompleteGraph.dgml"), imbSCI.Data.enums.getWritableFileMode.overwrite); /* * var chunkTargetRootNodes = InitialGraph.Nodes.Get(.Select(x => x.path)).ToDictionary(x => x.Id); * foreach (var gnp in chunkTargetRootNodes) * { * * if (gnp.Value is Node) * { * gnp.Value.Background = chunkInstanceGradient.GetColor(chunk.ExtractorName, true); * gnp.Value.StrokeDashArray = ""; * gnp.Value.StrokeThinkness = 5; * } * } * * * * foreach (ContentChunk chunk in items) * { * var graphNodes = InitialGraph.Nodes.Get(chunk.ContentAnalysis.allContent.items.Select(x => x.XPath)); * * var nodeColor = chunkInstanceGradient.GetColor(chunk.ExtractorName, true); * * var itemsByXPath = chunk.ContentAnalysis.allContent.items.ToDictionary(x => x.XPath); * * var graphNodeByXPath = chunk.ContentAnalysis.allContent.ContentGraph.getAllChildren().ToDictionary(x => x.path); * * foreach (var gn in graphNodes) * { * gn.Background = nodeColor; * * } * * var SelectedGraphNodes = InitialGraph.Nodes.Get(itemsByXPath.Keys).ToDictionary(x => x.Id); * * foreach (var gnp in SelectedGraphNodes) * { * gnp.Value.Background = nodeColor; * * } * * * } * */ foreach (ContentChunk chunk in items) { String chunkStrictName = chunk.name.getCleanPropertyName(); var subfolder = folder.Add(chunkStrictName, chunk.name, "Cluster group diagnostics"); chunk.Publish(subfolder); } }