/// <summary> /// Stand-alone intended use /// </summary> /// <param name="__name">The name.</param> /// <param name="__help">The help.</param> /// <param name="__output">The output.</param> protected aceConsolePluginBase(String __name, String __help = "", builderForLog __output = null) { name = __name; _consoleHelp = __help; //_output = __output; prepare(); }
// public string description { get; set; } = ""; // public bool IsEnabled { get; set; } = true; // public string name { get; set; } = "INDEX"; protected indexPlugIn_base(string __name, string __description) : base() { name = __name; description = __description; loger = new builderForLog(); aceLog.consoleControl.setAsOutput(loger, name); }
public crawlerDomainTaskMachine(modelSpiderTestRecord __tRecord, List <webSiteProfile> sample, directAnalyticReporter __reporter, folderNode __folder) { reporter = __reporter; folder = __folder; tRecord = __tRecord; logger = new builderForLog(); aceLog.consoleControl.setAsOutput(logger, tRecord.name); SetWebLoaderControler(__folder); items = new crawlerDomainTaskCollection(tRecord, sample, this); cpuTaker = new performanceCpu(tRecord.name); dataLoadTaker = new performanceDataLoad(tRecord.name); measureTaker = new performanceResources(tRecord.name, this); cpuTaker.take(); dataLoadTaker.take(); measureTaker.take(); tRecord.cpuTaker = cpuTaker; tRecord.dataLoadTaker = dataLoadTaker; tRecord.measureTaker = measureTaker; plugins = new enginePlugInCollection(this); reportPlugins = new reportingPlugInCollection(reporter, this); }
public List <weightTableCompiled> GetTFIDF_DLC_AllCached(builderForLog loger = null) { List <weightTableCompiled> allDLC_TFIDFs = new List <weightTableCompiled>(); List <string> DLC_TFIDF_Files = TFIDF_ConstructFolder.findFiles("dlc_*.xml"); if (loger != null) { loger.log("[" + DLC_TFIDF_Files.Count + "] DLC TFIDF files detected in the cache folder [" + TFIDF_ConstructFolder.path + "]"); } int tc = DLC_TFIDF_Files.Count; double tr = 0; int c = 0; foreach (string fPath in DLC_TFIDF_Files) { c++; weightTableCompiled dlc = new weightTableCompiled(fPath, true, c.ToString("D5")); allDLC_TFIDFs.Add(dlc); tr = c.GetRatio(tc); if (loger != null) { aceLog.consoleControl.writeToConsole(tr.ToString("P2") + " ", loger, false, 0); } } return(allDLC_TFIDFs); }
public reportPlugIn_base(string __name, string __description) { name = __name; description = __description; loger = new builderForLog(); aceLog.consoleControl.setAsOutput(loger, name); homeFolder = new folderNode(__homePath, "Home folder of plugin: " + __name, "Internal data for pluting " + __name + ". " + __description); }
public override void OnLoaded() { loger = new builderForLog(folder.pathFor("log.txt"), true, getWritableFileMode.appendFile); loger.log("Repository [" + name + "] accessed"); siteTable = new objectTable <imbMCWebSiteEntry>(folder.pathFor("siteTable.xml"), true, nameof(imbMCWebSiteEntry.domain), "siteTable"); siteTable.description = "Index datatable with all stored MCWebSite repo-entries"; }
/// <summary> /// Calls for type loading /// </summary> public void prepare() { builderForLog logger = new builderForLog(); imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(logger, "nlp"); modelTypeManager.LoadTypes(logger); webLemmaConstructorTypeManager.LoadTypes(logger); itmConstructorTypeManager.LoadTypes(logger); }
protected virtual void init(IAceComponent component) { commands = new aceMenu(); commands.setItems(this, component); Type consoleType = this.GetType(); output = new builderForLog(consoleType.Name + "_output", false); response = new builderForLog(consoleType.Name + "_response", false); consoleIsRunning = true; }
/// <summary> /// Initializes a new instance of the <see cref="pipelineModelExecutionContext"/> class. /// </summary> /// <param name="__model">The model.</param> public pipelineModelExecutionContext(IPipelineModel __model) { model = __model; model.exitBin.SetRoute(exitSubjects); model.trashBin.SetRoute(trashSubjects); startTime = DateTime.Now; lastStatusUpdate = DateTime.Now; logger = new builderForLog(); screenOutputControl.logToConsoleControl.setAsOutput(logger as IConsoleControl, "ExContext"); }
public void TestModificationLoad() { folderNode folder = new folderNode(); folder = folder.Add("diagnostic", "Diagnostic", "Directory used for testing"); var logger = new builderForLog(); String p = folder.pathFor("experimentTest2" + ".xml"); var test2 = objectSerialization.loadObjectFromXML <experimentSetup>(p, logger); Assert.AreEqual("MOD", test2.description); }
/// <summary> /// Loads the plugins. /// </summary> public void loadPlugins() { builderForLog logger = new builderForLog(); imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(logger, "wemTypes"); crawlerTypes = new wemCrawlerTypeManager(); crawlerTypes.LoadTypes(logger); crawlPluginTypes = new wemCrawlerPluginTypeManager(); crawlPluginTypes.LoadTypes(logger); imbSCI.Core.screenOutputControl.logToConsoleControl.removeFromOutput(logger); }
/// <summary> /// Gets the information prize for terms specified /// </summary> /// <param name="terms">The terms.</param> /// <param name="loger">The loger.</param> /// <returns></returns> public double GetInfoPrizeForTerms(IEnumerable <string> terms, builderForLog loger = null) { double output = 0; if (globalTFIDFCompiled != null) { List <IWeightTableTerm> mchl = globalTFIDFCompiled.GetMatches(terms); foreach (weightTableTermCompiled cterm in mchl) { output += cterm.tf_idf; } } return(output); }
public void ToString(builderForLog loger) { foreach (plugInGroupEnum gr in Keys) { if (this[gr].Count > 0) { loger.AppendLine("--- " + gr.ToString() + " [" + this[gr].Count + "]"); foreach (IPlugInCommonBase pl in this[gr]) { loger.AppendLine(pl.name); } } } }
/// <summary> /// Deprecated /// </summary> /// <param name="plugin_className">Name of the plugin class.</param> /// <param name="loger">The loger.</param> /// <param name="collection">The collection.</param> /// <returns></returns> public static plugIn_base GetPluginInstance(string plugin_className, builderForLog loger, IAPlugInCollectionBase collection) { plugIn_base plug = null; if (imbWEMManager.settings.supportEngine.plugins.Keys.Contains(plugin_className)) { plug = imbWEMManager.settings.supportEngine.plugins[plugin_className].getInstance() as plugIn_base; if (plug is indexPlugIn_base) { indexPlugIn_base plug_indexPlugIn_base = plug as indexPlugIn_base; loger.log("Plugin instance [" + plug.name + "] for Index Engine created"); } else if (plug is enginePlugIn_base) { loger.log("Plugin instance [" + plug.name + "] for Crawl Job Engine created"); //imbWEMManager.index.plugins.installPlugIn(plug as IPlugInCommonBase); } else if (plug is crawlerPlugIn_base) { loger.log("Plugin instance [" + plug.name + "] for Crawler created"); //imbWEMManager.index.plugins.installPlugIn(plug as IPlugInCommonBase); } else if (plug is reportPlugIn_base) { loger.log("Plugin instance [" + plug.name + "] for Reporting created"); // imbWEMManager.index.plugins.installPlugIn(plug as IPlugInCommonBase); } else { loger.log("Plugin instance [" + plug.name + "] of unknown category created... "); } if (collection != null) { collection.installPlugIn(plug); } } else { loger.AppendLine("Plugin [" + plugin_className + "] not found... "); } return(plug); }
public String DescribeSampleDistribution(ILogBuilder modelNotes) { if (modelNotes == null) { modelNotes = new builderForLog(); } var l = modelNotes.Length; foreach (var vc in GetCases()) { modelNotes.AppendHeading("Fold: " + vc.name, 2); var categoryCaseList = new aceDictionarySet <String, String>(); foreach (validationCaseCollection vcc in vc.trainingCases) { foreach (string vccs in vcc) { categoryCaseList.Add(vcc.className, "[T] " + vccs); } } foreach (validationCaseCollection vcc in vc.evaluationCases) { foreach (string vccs in vcc) { categoryCaseList.Add(vcc.className, "[E] " + vccs); } } foreach (var k in categoryCaseList.Keys) { modelNotes.AppendHeading("Category: " + k, 3); foreach (var s in categoryCaseList[k]) { modelNotes.AppendLine(s); } } } SampleDistributionNote = modelNotes.GetContent(l); SampleDistributionHash = md5.GetMd5Hash(SampleDistributionNote); return(SampleDistributionNote); }
/// <summary> /// Called when object is loaded /// </summary> public override void OnLoaded() { logger = new builderForLog(); aceLog.consoleControl.setAsOutput(logger, name); industries.OnLoad <industryClassModel>(folder, logger); experiment.ForEach(x => x.deploy()); compositeTemplate = new experimentCompositeTemplate(folder); //validationCases.OnLoad(folder, logger); //if (!industries.GetClasses().Any()) //{ // DefineDefaultIndustries(); //} }
public void TestExperimentSetupLoadSave() { folderNode folder = new folderNode(); folder = folder.Add("diagnostic", "Diagnostic", "Directory used for testing"); var logger = new builderForLog(); var test = experimentSetup.GetDefaultExperimentSetup(); test.name = "experimentTest"; test.description = "testing experiment load and save"; String p = folder.pathFor(test.name + ".xml"); objectSerialization.saveObjectToXML(test, p); var test2 = objectSerialization.loadObjectFromXML <experimentSetup>(p, logger); Assert.AreEqual(test.name, test2.name); Assert.AreEqual(test.description, test2.description); Assert.AreEqual(test.featureVectorExtractors_semantic.Count, test2.featureVectorExtractors_semantic.Count); }
protected pipelineModel(ILogBuilder _logger = null, String __name = "") : base() { if (!__name.isNullOrEmpty()) { name = __name; } exitBin = new pipelineNodeBin <T>("ExitBin", this); trashBin = new pipelineNodeBin <T>("TrashBin", this); if (_logger == null) { builderForLog __logger = new builderForLog(); logger = __logger; imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(__logger, name); logger = _logger; } else { logger = _logger; } }
/// <summary> /// Runs the macro. To adjust macro execution override <see cref="innerRun(analyticJob, analyticJobRunFlags, analyticProject, builderForLog)"/> method. /// <c>innerRun</c> is called witnih this method, after common initalization procedure /// </summary> /// <remarks> /// <para>Report creation is done after <see cref="innerRun(analyticJob, analyticJobRunFlags, analyticProject, builderForLog)"/> call.</para> /// </remarks> /// <param name="aJob">a job.</param> /// <param name="aFlags">a flags.</param> /// <param name="aProject">a project.</param> /// <param name="aTerminal">a terminal.</param> /// <returns></returns> public deliveryInstance run(analyticJob aJob, analyticJobRunFlags aFlags, analyticProject aProject = null, builderForLog aTerminal = null) { string runstamp = aJob.runstamp; //aFlags = aFlags.SetFlag<analyticJobRunFlags>(analyticJobRunFlags.report_FolderPurge, imbWEMManager.settings.postReportEngine.reportPurgeFolder); //bool projectCreated = false; //if ((aProject == null) || aFlags.HasFlag(analyticJobRunFlags.setup_sciProjectFromPreset)) //{ // //aProject = new analyticProject(); // //aProject.afterLoadDeploy(); // //projectCreated = true; //} //analyticJobRecord aRecord = new analyticJobRecord(aJob, aProject, aFlags); //aceLog.consoleControl.setAsOutput(aRecord, "aRecord"); //// <---------- counting the sample //imbWEMManager.webProfileGroups.setGroupCounts(aProject.mainWebProfiler.webSiteProfiles); //if (projectCreated) aRecord.logBuilder.log("SciProject new instance (byFlag) ::" + aProject.GetType().Name + " with defaults"); // Analytic macro script [" + this.GetType().Name + "] execution started"); //if (aTerminal != null) //{ // //aceLog.logBuilderRegistry.Add(logOutputSpecial.systemMainLog, aTerminal); // logSystem.externalLoger = aTerminal; //} //// ---------------------------------------------------------------- INITIATION SECTION ////----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- //if (!aFlags.HasFlag(analyticJobRunFlags.execution_skipTest)) //{ // aRecord.logBuilder.open("tag", "Execution: " + GetType().Name, "the system initial self-configuration"); // ////// ---------------------------------------------------- INNER RUN CALL // innerRun(aJob, aFlags, aProject, aRecord); // ////// ---------------------------------------------------- INNER RUN CALL // aRecord.logBuilder.close(); // ///// inner run called //} else //{ // aRecord.logBuilder.log("The macro script never executed :: " + analyticJobRunFlags.execution_skipTest + " default instance created"); //} //// <---------- Record is finished //aRecord.recordFinish(); //// ---------------------------------------------------------------- REPORTING SECTION //aRecord.logBuilder.log("Report construction initiated"); //// -- create deliveryInstance //executeOtherCommons(aRecord); //metaDocumentRootSet aReport = executeBuildReport(aRecord); // deliveryInstance reportDeliveryInstance = executeRenderReport(aReport, aRecord); return(null); // reportDeliveryInstance; }
public void addSynonymsAndConceptLinks(termExploreModel termModel, bool saveModel = false) { // <----------- ADDING SYNONYMS ----------- > var lemmas = manager.getLemma(termModel.lemmaForm); var lemmasyns = manager.getLemmas(termModel.synonyms); builderForLog linkLog = new builderForLog(); linkLog.open("Creating synonym-2-lemma links"); termModel.links_synonym = 0; foreach (ITermLemma lemma in lemmas) { foreach (ITermLemma lemsyn in lemmasyns) { bool added = false; if (!lemma.relatedTo.Contains(lemsyn)) { if (!lemma.relatedFrom.Contains(lemsyn)) { lemma.relatedTo.Add(lemsyn); added = true; } } if (added) { termModel.links_synonym++; linkLog.AppendLine("[" + termModel.links_synonym.ToString("D5") + "] " + lemma.name + " -> " + lemsyn.name); } else { linkLog.AppendLine("[Link exists] " + lemma.name + " -> " + lemsyn.name); } } } linkLog.close(); linkLog.open("Creating concept 2 lemma links"); // <----------- ADDING SYNSETS List <Concept> concepts = new List <Concept>(); Concept conHead = null; termModel.links_lemmaConcept = 0; foreach (string code in termModel.wordnetPrimarySymsets) { if (code.isCleanWord()) { aceLog.log("wrong symset code -- [" + code + "] -- ignored!"); continue; } Concept con = manager.getConcept(code, true, "WordNet Code"); bool added = false; foreach (TermLemma lemma in lemmas) { if (!con.lemmas.Contains(lemma)) { con.lemmas.Add(lemma); added = true; } if (added) { termModel.links_lemmaConcept++; linkLog.AppendLine("[" + termModel.links_lemmaConcept.ToString("D5") + "] " + con.name + " -> " + lemma.name); } else { linkLog.AppendLine("[Link exists] " + con.name + " -> " + lemma.name); } } concepts.Add(con); conHead = con; } linkLog.close(); linkLog.open("Creating concept 2 concept links"); // <--------------------------- linking SYNSET concepts termModel.links_conceptConcept = 0; foreach (Concept con in concepts) { foreach (Concept con2 in concepts) { bool added = false; if (!con2.relatedTo.Contains(con)) { if (!con2.relatedFrom.Contains(con)) { var sharedLemmas = con2.lemmas.Where(x => con.lemmas.Contains(x)); if (sharedLemmas.Count() > 0) { con2.relatedTo.Add(con); added = true; } } } if (added) { termModel.links_conceptConcept++; linkLog.AppendLine("[" + termModel.links_conceptConcept.ToString("D5") + "] " + con2.name + " -> " + con.name); } else { linkLog.AppendLine("[Link exists] " + con2.name + " -> " + con.name); } } } linkLog.close(); manager.lexiconContext.SaveChanges(); string pth = projectFolderStructure[lexiconConstructorProjectFolder.links].pathFor(termModel.filename(".txt")); linkLog.ToString().saveStringToFile(pth, getWritableFileMode.overwrite); if (saveModel) { saveTermModel(termModel); } }
/// <summary> /// Performs full domain reevaluation /// </summary> /// <param name="settings">The settings.</param> /// <param name="loger">The loger.</param> /// <param name="__wRecord">The w record.</param> /// <param name="evaluator">The evaluator.</param> public void doDomainEvaluation(IndexEngineConfiguration settings, builderForLog loger, modelSpiderSiteRecord __wRecord, multiLanguageEvaluator evaluator, weightTableCompiled mTFIDF) { indexDomain idomain = null; //lock (updateIndexLockD) //{ idomain = imbWEMManager.index.domainIndexTable.GetDomain(__wRecord.domainInfo.domainName); // } idomain.url = __wRecord.domain; //if (mTFIDF == null) mTFIDF = GetTFIDF_Master(loger, true, true); double dIP = 0; int p = 0; List <string> dTerms = new List <string>(); List <string> dDistinctTerms = new List <string>(); List <string> dLemmas = new List <string>(); List <string> dWords = new List <string>(); List <string> urls = new List <string>(); bool doEvalD = true; foreach (spiderTarget target in __wRecord.context.targets.GetLoaded()) { indexPage ipage = null; // lock (updateIndexLock) // { ipage = imbWEMManager.index.deployTarget(target, __wRecord, idomain); // } bool doEval = true; int dLc = 0; if (settings.plugIn_indexDBUpdater_optimizedMode) { if ((ipage.InfoPrize > 0) && (ipage.Lemmas > 0) && (ipage.relevancyText == nameof(indexPageRelevancyEnum.isRelevant))) { doEval = false; if (ipage.AllWords.isNullOrEmpty()) { doEval = true; } if (ipage.AllLemmas.isNullOrEmpty()) { doEval = true; } } } if (doEval) { List <string> terms = new List <string>(); if (ipage.AllWords.isNullOrEmpty()) { terms = GetTermsForPage(target, idomain, ipage, evaluator, loger); } else { terms = ipage.AllWords.SplitSmart(",", "", true); } ipage.AllWords = terms.toCsvInLine(); double IP = 0; List <string> lemmas = new List <string>(); List <IWeightTableTerm> mchl = mTFIDF.GetMatches(terms); if (ipage.AllLemmas.isNullOrEmpty()) { // terms = GetTermsForPage(target, idomain, ipage, evaluator, loger); lemmas.AddRange(mchl.Select(x => x.nominalForm)); } else { lemmas = ipage.AllLemmas.SplitSmart(",", "", true); } foreach (weightTableTermCompiled cterm in mchl) { IP += cterm.tf_idf; //dTerms.AddUnique(cterm.nominalForm); if (cterm.df == 1) { dDistinctTerms.AddUnique(cterm.nominalForm); } } ipage.InfoPrize = IP; dIP += IP; ipage.Lemmas = lemmas.Count; ipage.AllLemmas = lemmas.toCsvInLine(); dWords.AddRange(terms); dLemmas.AddRange(lemmas); ipage.Note = "indexUpdate" + SessionID; // lock (updateIndexLockB) // { imbWEMManager.index.pageIndexTable.AddOrUpdate(ipage); // } // if (loger!=null) loger.AppendLine(String.Format("[{0,25}] [{1,70}] IP[{2,7}] LM[{3,6}]", idomain.domain, ipage.url.TrimToMaxLength(60), ipage.InfoPrize.ToString("F4"), ipage.Lemmas.ToString("D5"))); } else { dIP += ipage.InfoPrize; doEvalD = false; // if (loger != null) loger.AppendLine(String.Format("[{0,25}] [{1,70}] IP[{2,7}] LM[{3,6}]", " ^---- using existing ", ipage.url.TrimToMaxLength(60), ipage.InfoPrize.ToString("F4"), ipage.Lemmas.ToString("D5"))); } urls.Add(ipage.url); p++; loger.AppendLine(string.Format("[{0,25}] [{1,70}] IP[{2,7}] LM[{3,6}]", idomain.domain, ipage.url.toWidthMaximum(60), ipage.InfoPrize.ToString("F4"), ipage.Lemmas.ToString("D5"))); target.Dispose(); } if (imbWEMManager.settings.indexEngine.plugIn_indexDBUpdater_updateDomainEntry) { if (!doEvalD) { var dlc_tf = imbWEMManager.index.experimentEntry.GetTFIDF_DLC(idomain); int dlc_c = dlc_tf.Count; idomain.TFIDFcompiled = (dlc_c > 0); idomain.Lemmas = dlc_c; } else { idomain.Lemmas = dLemmas.Count; idomain.Words = dWords.Count; idomain.TFIDFcompiled = (dLemmas.Count > 0); idomain.DistinctLemmas = dDistinctTerms.toCsvInLine(); idomain.AllLemmas = dLemmas.toCsvInLine(); idomain.AllWords = dWords.toCsvInLine(); } idomain.InfoPrize = dIP; //if (doEvalD) var urlAssert = imbWEMManager.index.pageIndexTable.GetUrlAssertion(urls); idomain.relevantPages = urlAssert[indexPageEvaluationEntryState.isRelevant].Count; idomain.notRelevantPages = urlAssert[indexPageEvaluationEntryState.notRelevant].Count; idomain.detected = urlAssert[indexPageEvaluationEntryState.haveNoEvaluationEntry].Count; idomain.Crawled = urlAssert.certainty; idomain.RelevantContentRatio = urlAssert.relevant; string rpp = string.Format("[{0,25}] Pages [{1,10}] IP[{2,10}] LM[{3,10}]", idomain.domain, p, idomain.InfoPrize.ToString("F5"), idomain.Lemmas.ToString("D7")); if (loger != null) { loger.AppendLine(rpp); } } if (imbWEMManager.settings.indexEngine.plugIn_indexDBUpdater_updateDomainEntry) { imbWEMManager.index.domainIndexTable.AddOrUpdate(idomain); } imbWEMManager.index.wRecordsDeployed++; __wRecord.Dispose(); }
public aceConsolePluginForFiles(string __name, string __help = "", builderForLog __output = null) : base(__name, __help, __output) { folder = new DirectoryInfo(Directory.GetCurrentDirectory()); }
// public string description { get; set; } = "Adjusts the TC_max to reach designated goal"; // public bool IsEnabled { get; set; } = true; // public string name { get; set; } = "TC_Control"; protected enginePlugIn_base() { loger = new builderForLog(); aceLog.consoleControl.setAsOutput(loger, name); }
/// <summary> /// Sets the execution context. /// </summary> /// <param name="_manager">The manager.</param> /// <param name="_setup">The setup.</param> /// <param name="_tools">The tools.</param> /// <param name="_classes">The classes.</param> /// <param name="sufix">The sufix.</param> /// <param name="chunker">The chunker.</param> /// <param name="_masterExtractor">The master extractor.</param> /// <param name="_logger">The logger.</param> public void SetExecutionContext(experimentManager _manager, experimentSetup _setup, classifierTools _tools, DocumentSetClasses _classes, String sufix, chunkComposerBasic chunker, semanticFVExtractor _masterExtractor, ILogBuilder _logger = null) { if (_logger == null) { _logger = new builderForLog(); aceLog.consoleControl.setAsOutput(_logger, _setup.name); } logger = _logger; chunkComposer = chunker; setup = _setup; tools = _tools; tools.context = this; classes = _classes; // masterConstructor = _masterExtractor.termTableConstructor; masterExtractor = _setup.featureVectorExtractors_semantic.First(); masterConstructor = masterExtractor.termTableConstructor; manager = _manager; String expContextName = "exp_" + setup.name.add(sufix, "_"); folder = manager.folder.Add(expContextName, "Experiment " + setup.name, "Directory with all information on the experiment [" + setup.name + "]"); errorNotesFolder = folder.Add("errors", "Error logs", "Directory with error reports produced if an exception occours. Normally, if everything was ok this folder should have only two files inside: directory_readme.txt and empty: note.txt)."); errorNotes = new experimentNotes(errorNotesFolder, "Notes (logs) about critical and non-critical errors that happen during experiment execution. If everything was ok - this file should remain empty"); notes = new experimentNotes(folder, "Notes on experiment setup and execution log"); aceLog.consoleControl.setAsOutput(notes, "Notes"); notes.log("Experiment [" + expContextName + "] initiated"); notes.AppendLine("About: " + setup.description); notes.AppendHorizontalLine(); notes.SaveNote(); notes.AppendHeading("Feature extraction models"); var lnsc = chunkComposer.DescribeSelf(); lnsc.ForEach(x => notes.AppendLine(x)); notes.AppendLine(" - "); List <String> mdn = new List <string>(); foreach (var md in setup.models) { if (mdn.Contains(md.name)) { md.name += "_" + mdn.Count.ToString(); } else { mdn.Add(md.name); } } foreach (var md in setup.models) { String prefix = md.name; md.classes = classes; md.BuildFeatureVectorDefinition(); var lns = md.DescribeSelf(); lns.ForEach(x => notes.AppendLine(x)); kFoldValidationCollection validationCases = classes.BuildValidationCases(prefix, setup.validationSetup.k, tools.DoDebug, logger, folder, setup.validationSetup.randomize); validationCases.pipelineCollection = pipelineCollection; validationCases.connectContext(this, md); validationCollections.Add(md.name, validationCases); //md.postClassifiers = setup.classifiers; } }
protected void prepare() { _output = new builderForLog(); aceLog.consoleControl.setAsOutput(_output, consoleTitle); }
/// <summary> /// Gets the or create tfidf DLC. /// </summary> /// <param name="__wRecord">The w record.</param> /// <param name="loger">The loger.</param> /// <param name="__useExisting">if set to <c>true</c> [use existing].</param> /// <param name="__saveToCache">if set to <c>true</c> [save to cache].</param> /// <param name="evaluator">The evaluator.</param> /// <returns></returns> public weightTableCompiled GetOrCreateTFIDF_DLC(modelSpiderSiteRecord __wRecord, builderForLog loger, bool __useExisting, bool __saveToCache, multiLanguageEvaluator evaluator = null) { indexDomain idomain = imbWEMManager.index.domainIndexTable.GetOrCreate(__wRecord.domain); FileInfo TFIDF_DLC_File = GetTFIDF_DLC_File(idomain, getWritableFileMode.existing); weightTableCompiled TFIDF_DLC = null; if (TFIDF_DLC_File.Exists && __useExisting) { TFIDF_DLC = new weightTableCompiled(TFIDF_DLC_File.FullName, true, idomain.domain + "_DLC_TF_IDF"); loger.log("DLC TF-IDF[" + TFIDF_DLC.Count + "] cache found for: " + idomain.domain); return(TFIDF_DLC); } if (evaluator == null) { evaluator = __wRecord.tRecord.evaluator; } // <--------------- evaluator selection if (imbWEMManager.settings.TFIDF.doUseHeuristicDLCTFIDFConstruction) { TFIDF_DLC = GetOrCreateTFIDF_DLC_Heuristic(__wRecord, loger, __useExisting, __saveToCache, evaluator); } else { loger.log("DLC TF-IDF construction for: " + idomain.domain + " initiated."); termDocumentSet domainSet = new termDocumentSet("DomainTFIDF_source"); var tLoaded = __wRecord.context.targets.GetLoaded(); int tc = tLoaded.Count; int ti = 0; int ts = 10; int c = 0; int input_c = 0; int output_c = 0; double io_r = 0; foreach (spiderTarget target in tLoaded) { ti++; c++; double tp = ti.GetRatio(tc); if (target.IsRelevant) { var wordlist = GetTermsForPage(target, idomain, null, evaluator, loger); input_c += wordlist.Count; termDocument pageTF = domainSet.AddTable(target.pageHash) as termDocument; pageTF.expansion = 1; pageTF.AddTokens(wordlist, loger); output_c += pageTF.Count(); } if (c > 10) { c = 0; io_r = output_c.GetRatio(input_c); aceLog.consoleControl.writeToConsole("Pages processed [" + tp.ToString("P2") + "] Semantic compression rate: " + io_r.ToString("P2"), loger, false, 0); } } loger.log("[" + idomain.domain + "] preprocess finished. DLC TF-IDF terms [" + domainSet.CountAllDocuments() + "]"); TFIDF_DLC = domainSet.AggregateDocument.GetCompiledTable(loger); TFIDF_DLC.name = "DLC-TFIDF " + idomain.domain; } idomain.Lemmas = TFIDF_DLC.Count; if (__saveToCache) { if (TFIDF_DLC.SaveAs(TFIDF_DLC_File.FullName, getWritableFileMode.overwrite)) { loger.log("[" + idomain.domain + "] DLC TF-IDF compiled table cache saved to: " + TFIDF_DLC_File.FullName); } else { loger.log("[" + idomain.domain + "] DLC TF-IDF compiled table save failed"); } } imbWEMManager.index.domainIndexTable.AddOrUpdate(idomain); return(TFIDF_DLC); }
public static void setTerminal(builderForLog __terminal) { _terminal = __terminal; }
public void evaluateIndexPerformance(indexPerformanceEntry indexSessionEntry, builderForLog loger, string crawlId) { indexSessionEntry.SessionID = imbWEMManager.index.experimentManager.SessionID; indexSessionEntry.CrawlID = "[Index Evaluation]"; indexSessionEntry.IndexRepository = imbWEMManager.index.current_indexID; indexSessionEntry.Start = DateTime.Now; indexSessionEntry.CrawlerHash = analyticConsole.mainAnalyticConsole.state.setupHash_crawler; indexSessionEntry.GlobalSetupHash = analyticConsole.mainAnalyticConsole.state.setupHash_global; imbWEMManager.index.domainIndexTable.GetDomains(indexDomainContentEnum.any); //indexSessionEntry.Domains = domainIndexTable.Count; //indexSessionEntry.Pages = pageIndexTable.Count; //indexSessionEntry.PagesEvaluated = pageIndexTable.Where(x => !x.relevancyText.isNullOrEmpty()).Count(); }
/// <summary> /// Features the selection. /// </summary> /// <param name="context">The context.</param> /// <param name="log">The log.</param> public void FeatureSelection(OperationContext context, ILogBuilder log, Boolean EnableSelection = true) { log.log("Feature selection [" + EnableSelection.ToString() + "]"); if (filter == null) { EnableSelection = false; } else if (filter.WeightModel == null) { EnableSelection = false; } else if (!filter.WeightModel.GlobalFactors.Any()) { EnableSelection = false; } if (!context.spaceModel.IsModelReady) { log.log("-- Feature selection function shouldn't be called before creation of the space model."); } List <KeyValuePair <string, double> > filter_result = new List <KeyValuePair <string, double> >(); if (EnableSelection) { filter_result = filter.SelectFeatures(context.spaceModel, log, notes.folder); } else { } builderForLog textBuilder = new builderForLog(); filter.Describe(textBuilder); context.SelectedFeatures = new WeightDictionary("FS_" + context.name, "Features selected by " + filter.GetSignature() + ". Info: " + textBuilder.GetContent()); if (filter_result.Any()) { foreach (var pair in filter_result) { context.SelectedFeatures.AddEntry(pair.Key, pair.Value); } //if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_selectedFeatures)) //{ // notes.SaveDataTable(context.SelectedFeatures.MakeTable("selected_features", "Features selected for BoW construction", new List<string>() { filter.function.shortName }, generalContext.DictionaryReportLimit), notes.folder_corpus); //} } else { String msg = "-- Feature selection function returned zero set. All features [" + context.spaceModel.terms_known_label.Count + "] are therefore accepted as selected."; context.SelectedFeatures.description += msg; log.log(msg); var tkns = context.spaceModel.GetTokens(true, false); foreach (var tkn in tkns) { context.SelectedFeatures.AddEntry(tkn, 1); } } if (context.SelectedFeatures.Count < context.spaceModel.terms_known_label.Count) { context.spaceModel.FilterSpaceModelFeatures(context.SelectedFeatures, log); } else { context.spaceModel.terms_unknown_label.FilterTokens(context.SelectedFeatures.GetKeys()); } if (filter.WeightModel != null) { filter.WeightModel.Dispose(); } }
/// <summary> /// The stage two exploration /// </summary> /// <param name="lemma">The lemma.</param> /// <param name="response">The response.</param> /// <param name="savemodel">if set to <c>true</c> [savemodel].</param> /// <param name="debug">if set to <c>true</c> [debug].</param> /// <param name="verbose">if set to <c>true</c> [verbose].</param> /// <returns></returns> public static termExploreModelSet exploreStageTwo(string lemma, ILogBuilder response, bool savemodel, bool debug, bool verbose, lexiconTaskBase task = null) { lexiconConstructor constructor = semanticLexiconManager.manager.constructor; termExploreModelSet outset = semanticLexiconManager.manager.constructor.loadTermModels(lemma, true); if (!Enumerable.Any(outset)) { outset.missingLemmas.Add(lemma); return(outset); } foreach (termExploreModel mod in outset) { builderForLog logout = new builderForLog(); if (verbose) { aceLog.consoleControl.setAsOutput(logout, "stage2"); } termExploreModel model = getSynonymsWithApertium(mod, logout); string pt = model.lemma.gramSet.getPosType().ToString(); if (savemodel) { // model.graph.saveDescription(constructor.projectFolderStructure[lexiconConstructorProjectFolder.logs].path, pt + "_related"); } model = getSynonymsWithWordnetViaApertium(model, logout, true, false); if (savemodel) { model.graph.saveDescription(constructor.projectFolderStructure[lexiconConstructorProjectFolder.logs].path, pt + "_concepts"); // model.graph.savePaths(constructor.projectFolderStructure[lexiconConstructorProjectFolder.logs].path, pt + "_concepts"); } model.PostProcess(); if (debug) { model.ToString(logout, true, true); string fn = model.lemma.inputForm + "_" + pt + "_log.md"; logout.ToString(false).saveStringToFile(constructor.projectFolderStructure[lexiconConstructorProjectFolder.logs].pathFor(fn), getWritableFileMode.overwrite); } if (verbose) { aceLog.consoleControl.removeFromOutput(logout); } if (savemodel) { if (task != null) { model.lastModifiedByStage = task.taskTitle; } else { model.lastModifiedByStage = "stageTwo-exploreProcedure"; } if (!model.wasExploreFailed) { constructor.saveTermModel(model); } else { outset.failedModels.Add(model); } } } return(outset); }