Example #1
0
 /// <summary>
 /// Stand-alone intended use
 /// </summary>
 /// <param name="__name">The name.</param>
 /// <param name="__help">The help.</param>
 /// <param name="__output">The output.</param>
 protected aceConsolePluginBase(String __name, String __help = "", builderForLog __output = null)
 {
     name         = __name;
     _consoleHelp = __help;
     //_output = __output;
     prepare();
 }
Example #2
0
        // public string description { get; set; } = "";



        // public bool IsEnabled { get; set; } = true;


        // public string name { get; set; } = "INDEX";

        protected indexPlugIn_base(string __name, string __description) : base()
        {
            name        = __name;
            description = __description;
            loger       = new builderForLog();
            aceLog.consoleControl.setAsOutput(loger, name);
        }
Example #3
0
        public crawlerDomainTaskMachine(modelSpiderTestRecord __tRecord, List <webSiteProfile> sample, directAnalyticReporter __reporter, folderNode __folder)
        {
            reporter = __reporter;
            folder   = __folder;
            tRecord  = __tRecord;


            logger = new builderForLog();
            aceLog.consoleControl.setAsOutput(logger, tRecord.name);

            SetWebLoaderControler(__folder);


            items = new crawlerDomainTaskCollection(tRecord, sample, this);

            cpuTaker      = new performanceCpu(tRecord.name);
            dataLoadTaker = new performanceDataLoad(tRecord.name);
            measureTaker  = new performanceResources(tRecord.name, this);

            cpuTaker.take();
            dataLoadTaker.take();
            measureTaker.take();

            tRecord.cpuTaker      = cpuTaker;
            tRecord.dataLoadTaker = dataLoadTaker;
            tRecord.measureTaker  = measureTaker;

            plugins       = new enginePlugInCollection(this);
            reportPlugins = new reportingPlugInCollection(reporter, this);
        }
        public List <weightTableCompiled> GetTFIDF_DLC_AllCached(builderForLog loger = null)
        {
            List <weightTableCompiled> allDLC_TFIDFs = new List <weightTableCompiled>();

            List <string> DLC_TFIDF_Files = TFIDF_ConstructFolder.findFiles("dlc_*.xml");

            if (loger != null)
            {
                loger.log("[" + DLC_TFIDF_Files.Count + "] DLC TFIDF files detected in the cache folder [" + TFIDF_ConstructFolder.path + "]");
            }



            int    tc = DLC_TFIDF_Files.Count;
            double tr = 0;
            int    c  = 0;


            foreach (string fPath in DLC_TFIDF_Files)
            {
                c++;
                weightTableCompiled dlc = new weightTableCompiled(fPath, true, c.ToString("D5"));

                allDLC_TFIDFs.Add(dlc);

                tr = c.GetRatio(tc);
                if (loger != null)
                {
                    aceLog.consoleControl.writeToConsole(tr.ToString("P2") + " ", loger, false, 0);
                }
            }
            return(allDLC_TFIDFs);
        }
Example #5
0
 public reportPlugIn_base(string __name, string __description)
 {
     name        = __name;
     description = __description;
     loger       = new builderForLog();
     aceLog.consoleControl.setAsOutput(loger, name);
     homeFolder = new folderNode(__homePath, "Home folder of plugin: " + __name, "Internal data for pluting " + __name + ". " + __description);
 }
        public override void OnLoaded()
        {
            loger = new builderForLog(folder.pathFor("log.txt"), true, getWritableFileMode.appendFile);
            loger.log("Repository [" + name + "] accessed");

            siteTable             = new objectTable <imbMCWebSiteEntry>(folder.pathFor("siteTable.xml"), true, nameof(imbMCWebSiteEntry.domain), "siteTable");
            siteTable.description = "Index datatable with all stored MCWebSite repo-entries";
        }
Example #7
0
        /// <summary>
        /// Calls for type loading
        /// </summary>
        public void prepare()
        {
            builderForLog logger = new builderForLog();

            imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(logger, "nlp");

            modelTypeManager.LoadTypes(logger);

            webLemmaConstructorTypeManager.LoadTypes(logger);
            itmConstructorTypeManager.LoadTypes(logger);
        }
Example #8
0
        protected virtual void init(IAceComponent component)
        {
            commands = new aceMenu();
            commands.setItems(this, component);

            Type consoleType = this.GetType();

            output           = new builderForLog(consoleType.Name + "_output", false);
            response         = new builderForLog(consoleType.Name + "_response", false);
            consoleIsRunning = true;
        }
        /// <summary>
        /// Initializes a new instance of the <see cref="pipelineModelExecutionContext"/> class.
        /// </summary>
        /// <param name="__model">The model.</param>
        public pipelineModelExecutionContext(IPipelineModel __model)
        {
            model = __model;
            model.exitBin.SetRoute(exitSubjects);
            model.trashBin.SetRoute(trashSubjects);

            startTime        = DateTime.Now;
            lastStatusUpdate = DateTime.Now;

            logger = new builderForLog();
            screenOutputControl.logToConsoleControl.setAsOutput(logger as IConsoleControl, "ExContext");
        }
Example #10
0
        public void TestModificationLoad()
        {
            folderNode folder = new folderNode();

            folder = folder.Add("diagnostic", "Diagnostic", "Directory used for testing");
            var logger = new builderForLog();

            String p = folder.pathFor("experimentTest2" + ".xml");

            var test2 = objectSerialization.loadObjectFromXML <experimentSetup>(p, logger);


            Assert.AreEqual("MOD", test2.description);
        }
Example #11
0
        /// <summary>
        /// Loads the plugins.
        /// </summary>
        public void loadPlugins()
        {
            builderForLog logger = new builderForLog();

            imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(logger, "wemTypes");

            crawlerTypes = new wemCrawlerTypeManager();
            crawlerTypes.LoadTypes(logger);

            crawlPluginTypes = new wemCrawlerPluginTypeManager();
            crawlPluginTypes.LoadTypes(logger);

            imbSCI.Core.screenOutputControl.logToConsoleControl.removeFromOutput(logger);
        }
        /// <summary>
        /// Gets the information prize for terms specified
        /// </summary>
        /// <param name="terms">The terms.</param>
        /// <param name="loger">The loger.</param>
        /// <returns></returns>
        public double GetInfoPrizeForTerms(IEnumerable <string> terms, builderForLog loger = null)
        {
            double output = 0;

            if (globalTFIDFCompiled != null)
            {
                List <IWeightTableTerm> mchl = globalTFIDFCompiled.GetMatches(terms);
                foreach (weightTableTermCompiled cterm in mchl)
                {
                    output += cterm.tf_idf;
                }
            }
            return(output);
        }
Example #13
0
        public void ToString(builderForLog loger)
        {
            foreach (plugInGroupEnum gr in Keys)
            {
                if (this[gr].Count > 0)
                {
                    loger.AppendLine("--- " + gr.ToString() + " [" + this[gr].Count + "]");

                    foreach (IPlugInCommonBase pl in this[gr])
                    {
                        loger.AppendLine(pl.name);
                    }
                }
            }
        }
Example #14
0
        /// <summary>
        /// Deprecated
        /// </summary>
        /// <param name="plugin_className">Name of the plugin class.</param>
        /// <param name="loger">The loger.</param>
        /// <param name="collection">The collection.</param>
        /// <returns></returns>
        public static plugIn_base GetPluginInstance(string plugin_className, builderForLog loger, IAPlugInCollectionBase collection)
        {
            plugIn_base plug = null;

            if (imbWEMManager.settings.supportEngine.plugins.Keys.Contains(plugin_className))
            {
                plug = imbWEMManager.settings.supportEngine.plugins[plugin_className].getInstance()  as plugIn_base;


                if (plug is indexPlugIn_base)
                {
                    indexPlugIn_base plug_indexPlugIn_base = plug as indexPlugIn_base;
                    loger.log("Plugin instance [" + plug.name + "] for Index Engine created");
                }
                else if (plug is enginePlugIn_base)
                {
                    loger.log("Plugin instance [" + plug.name + "] for Crawl Job Engine created");
                    //imbWEMManager.index.plugins.installPlugIn(plug as IPlugInCommonBase);
                }
                else if (plug is crawlerPlugIn_base)
                {
                    loger.log("Plugin instance [" + plug.name + "] for Crawler created");
                    //imbWEMManager.index.plugins.installPlugIn(plug as IPlugInCommonBase);
                }
                else if (plug is reportPlugIn_base)
                {
                    loger.log("Plugin instance [" + plug.name + "] for Reporting created");
                    //    imbWEMManager.index.plugins.installPlugIn(plug as IPlugInCommonBase);
                }
                else
                {
                    loger.log("Plugin instance [" + plug.name + "] of unknown category created... ");
                }


                if (collection != null)
                {
                    collection.installPlugIn(plug);
                }
            }
            else
            {
                loger.AppendLine("Plugin [" + plugin_className + "] not found... ");
            }

            return(plug);
        }
        public String DescribeSampleDistribution(ILogBuilder modelNotes)
        {
            if (modelNotes == null)
            {
                modelNotes = new builderForLog();
            }

            var l = modelNotes.Length;

            foreach (var vc in GetCases())
            {
                modelNotes.AppendHeading("Fold: " + vc.name, 2);

                var categoryCaseList = new aceDictionarySet <String, String>();

                foreach (validationCaseCollection vcc in vc.trainingCases)
                {
                    foreach (string vccs in vcc)
                    {
                        categoryCaseList.Add(vcc.className, "[T] " + vccs);
                    }
                }

                foreach (validationCaseCollection vcc in vc.evaluationCases)
                {
                    foreach (string vccs in vcc)
                    {
                        categoryCaseList.Add(vcc.className, "[E] " + vccs);
                    }
                }

                foreach (var k in categoryCaseList.Keys)
                {
                    modelNotes.AppendHeading("Category: " + k, 3);
                    foreach (var s in categoryCaseList[k])
                    {
                        modelNotes.AppendLine(s);
                    }
                }
            }

            SampleDistributionNote = modelNotes.GetContent(l);
            SampleDistributionHash = md5.GetMd5Hash(SampleDistributionNote);
            return(SampleDistributionNote);
        }
        /// <summary>
        /// Called when object is loaded
        /// </summary>
        public override void OnLoaded()
        {
            logger = new builderForLog();
            aceLog.consoleControl.setAsOutput(logger, name);

            industries.OnLoad <industryClassModel>(folder, logger);

            experiment.ForEach(x => x.deploy());


            compositeTemplate = new experimentCompositeTemplate(folder);


            //validationCases.OnLoad(folder, logger);

            //if (!industries.GetClasses().Any())
            //{
            //    DefineDefaultIndustries();
            //}
        }
Example #17
0
        public void TestExperimentSetupLoadSave()
        {
            folderNode folder = new folderNode();

            folder = folder.Add("diagnostic", "Diagnostic", "Directory used for testing");
            var logger = new builderForLog();


            var test = experimentSetup.GetDefaultExperimentSetup();

            test.name        = "experimentTest";
            test.description = "testing experiment load and save";
            String p = folder.pathFor(test.name + ".xml");

            objectSerialization.saveObjectToXML(test, p);

            var test2 = objectSerialization.loadObjectFromXML <experimentSetup>(p, logger);

            Assert.AreEqual(test.name, test2.name);
            Assert.AreEqual(test.description, test2.description);
            Assert.AreEqual(test.featureVectorExtractors_semantic.Count, test2.featureVectorExtractors_semantic.Count);
        }
Example #18
0
        protected pipelineModel(ILogBuilder _logger = null, String __name = "") : base()
        {
            if (!__name.isNullOrEmpty())
            {
                name = __name;
            }

            exitBin  = new pipelineNodeBin <T>("ExitBin", this);
            trashBin = new pipelineNodeBin <T>("TrashBin", this);

            if (_logger == null)
            {
                builderForLog __logger = new builderForLog();
                logger = __logger;
                imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(__logger, name);

                logger = _logger;
            }
            else
            {
                logger = _logger;
            }
        }
Example #19
0
        /// <summary>
        /// Runs the macro. To adjust macro execution override <see cref="innerRun(analyticJob, analyticJobRunFlags, analyticProject, builderForLog)"/> method.
        /// <c>innerRun</c> is called witnih this method, after common initalization procedure
        /// </summary>
        /// <remarks>
        /// <para>Report creation is done after <see cref="innerRun(analyticJob, analyticJobRunFlags, analyticProject, builderForLog)"/> call.</para>
        /// </remarks>
        /// <param name="aJob">a job.</param>
        /// <param name="aFlags">a flags.</param>
        /// <param name="aProject">a project.</param>
        /// <param name="aTerminal">a terminal.</param>
        /// <returns></returns>
        public deliveryInstance run(analyticJob aJob, analyticJobRunFlags aFlags, analyticProject aProject = null, builderForLog aTerminal = null)
        {
            string runstamp = aJob.runstamp;

            //aFlags = aFlags.SetFlag<analyticJobRunFlags>(analyticJobRunFlags.report_FolderPurge, imbWEMManager.settings.postReportEngine.reportPurgeFolder);

            //bool projectCreated = false;
            //if ((aProject == null) || aFlags.HasFlag(analyticJobRunFlags.setup_sciProjectFromPreset))
            //{
            //    //aProject = new analyticProject();
            //    //aProject.afterLoadDeploy();
            //    //projectCreated = true;
            //}

            //analyticJobRecord aRecord = new analyticJobRecord(aJob, aProject, aFlags);
            //aceLog.consoleControl.setAsOutput(aRecord, "aRecord");

            //// <---------- counting the sample
            //imbWEMManager.webProfileGroups.setGroupCounts(aProject.mainWebProfiler.webSiteProfiles);

            //if (projectCreated) aRecord.logBuilder.log("SciProject new instance (byFlag) ::" + aProject.GetType().Name + " with defaults"); // Analytic macro script [" + this.GetType().Name + "] execution started");

            //if (aTerminal != null)
            //{
            //    //aceLog.logBuilderRegistry.Add(logOutputSpecial.systemMainLog, aTerminal);
            //    logSystem.externalLoger = aTerminal;
            //}

            //// ----------------------------------------------------------------  INITIATION SECTION

            ////-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
            //if (!aFlags.HasFlag(analyticJobRunFlags.execution_skipTest))
            //{
            //    aRecord.logBuilder.open("tag", "Execution: " + GetType().Name, "the system initial self-configuration");

            //    ////// ---------------------------------------------------- INNER RUN CALL
            //    innerRun(aJob, aFlags, aProject, aRecord);
            //    ////// ---------------------------------------------------- INNER RUN CALL


            //    aRecord.logBuilder.close();
            //    ///// inner run called
            //} else
            //{
            //    aRecord.logBuilder.log("The macro script never executed :: " + analyticJobRunFlags.execution_skipTest + " default instance created");
            //}



            //// <---------- Record is finished
            //aRecord.recordFinish();

            //// ----------------------------------------------------------------  REPORTING SECTION


            //aRecord.logBuilder.log("Report construction initiated");
            //// -- create deliveryInstance

            //executeOtherCommons(aRecord);

            //metaDocumentRootSet aReport = executeBuildReport(aRecord);

            // deliveryInstance reportDeliveryInstance = executeRenderReport(aReport, aRecord);

            return(null); // reportDeliveryInstance;
        }
        public void addSynonymsAndConceptLinks(termExploreModel termModel, bool saveModel = false)
        {
            // <----------- ADDING SYNONYMS ----------- >

            var lemmas = manager.getLemma(termModel.lemmaForm);

            var lemmasyns = manager.getLemmas(termModel.synonyms);

            builderForLog linkLog = new builderForLog();

            linkLog.open("Creating synonym-2-lemma links");

            termModel.links_synonym = 0;
            foreach (ITermLemma lemma in lemmas)
            {
                foreach (ITermLemma lemsyn in lemmasyns)
                {
                    bool added = false;
                    if (!lemma.relatedTo.Contains(lemsyn))
                    {
                        if (!lemma.relatedFrom.Contains(lemsyn))
                        {
                            lemma.relatedTo.Add(lemsyn);

                            added = true;
                        }
                    }
                    if (added)
                    {
                        termModel.links_synonym++;
                        linkLog.AppendLine("[" + termModel.links_synonym.ToString("D5") + "] " + lemma.name + " -> " + lemsyn.name);
                    }
                    else
                    {
                        linkLog.AppendLine("[Link exists] " + lemma.name + " -> " + lemsyn.name);
                    }
                }
            }
            linkLog.close();

            linkLog.open("Creating concept 2 lemma links");
            // <----------- ADDING SYNSETS
            List <Concept> concepts = new List <Concept>();
            Concept        conHead  = null;

            termModel.links_lemmaConcept = 0;
            foreach (string code in termModel.wordnetPrimarySymsets)
            {
                if (code.isCleanWord())
                {
                    aceLog.log("wrong symset code -- [" + code + "]  -- ignored!");
                    continue;
                }
                Concept con   = manager.getConcept(code, true, "WordNet Code");
                bool    added = false;
                foreach (TermLemma lemma in lemmas)
                {
                    if (!con.lemmas.Contains(lemma))
                    {
                        con.lemmas.Add(lemma);
                        added = true;
                    }
                    if (added)
                    {
                        termModel.links_lemmaConcept++;
                        linkLog.AppendLine("[" + termModel.links_lemmaConcept.ToString("D5") + "] " + con.name + " -> " + lemma.name);
                    }
                    else
                    {
                        linkLog.AppendLine("[Link exists] " + con.name + " -> " + lemma.name);
                    }
                }

                concepts.Add(con);
                conHead = con;
            }
            linkLog.close();

            linkLog.open("Creating concept 2 concept links");
            // <--------------------------- linking SYNSET concepts
            termModel.links_conceptConcept = 0;
            foreach (Concept con in concepts)
            {
                foreach (Concept con2 in concepts)
                {
                    bool added = false;
                    if (!con2.relatedTo.Contains(con))
                    {
                        if (!con2.relatedFrom.Contains(con))
                        {
                            var sharedLemmas = con2.lemmas.Where(x => con.lemmas.Contains(x));
                            if (sharedLemmas.Count() > 0)
                            {
                                con2.relatedTo.Add(con);
                                added = true;
                            }
                        }
                    }
                    if (added)
                    {
                        termModel.links_conceptConcept++;
                        linkLog.AppendLine("[" + termModel.links_conceptConcept.ToString("D5") + "] " + con2.name + " -> " + con.name);
                    }
                    else
                    {
                        linkLog.AppendLine("[Link exists] " + con2.name + " -> " + con.name);
                    }
                }
            }
            linkLog.close();

            manager.lexiconContext.SaveChanges();
            string pth = projectFolderStructure[lexiconConstructorProjectFolder.links].pathFor(termModel.filename(".txt"));

            linkLog.ToString().saveStringToFile(pth, getWritableFileMode.overwrite);

            if (saveModel)
            {
                saveTermModel(termModel);
            }
        }
        /// <summary>
        /// Performs full domain reevaluation
        /// </summary>
        /// <param name="settings">The settings.</param>
        /// <param name="loger">The loger.</param>
        /// <param name="__wRecord">The w record.</param>
        /// <param name="evaluator">The evaluator.</param>
        public void doDomainEvaluation(IndexEngineConfiguration settings, builderForLog loger, modelSpiderSiteRecord __wRecord, multiLanguageEvaluator evaluator, weightTableCompiled mTFIDF)
        {
            indexDomain idomain = null;

            //lock (updateIndexLockD)
            //{
            idomain = imbWEMManager.index.domainIndexTable.GetDomain(__wRecord.domainInfo.domainName);
            // }

            idomain.url = __wRecord.domain;

            //if (mTFIDF == null) mTFIDF = GetTFIDF_Master(loger, true, true);

            double        dIP    = 0;
            int           p      = 0;
            List <string> dTerms = new List <string>();

            List <string> dDistinctTerms = new List <string>();

            List <string> dLemmas = new List <string>();
            List <string> dWords  = new List <string>();


            List <string> urls = new List <string>();

            bool doEvalD = true;



            foreach (spiderTarget target in __wRecord.context.targets.GetLoaded())
            {
                indexPage ipage = null;

                // lock (updateIndexLock)
                // {
                ipage = imbWEMManager.index.deployTarget(target, __wRecord, idomain);
                // }
                bool doEval = true;
                int  dLc    = 0;

                if (settings.plugIn_indexDBUpdater_optimizedMode)
                {
                    if ((ipage.InfoPrize > 0) && (ipage.Lemmas > 0) && (ipage.relevancyText == nameof(indexPageRelevancyEnum.isRelevant)))
                    {
                        doEval = false;

                        if (ipage.AllWords.isNullOrEmpty())
                        {
                            doEval = true;
                        }
                        if (ipage.AllLemmas.isNullOrEmpty())
                        {
                            doEval = true;
                        }
                    }
                }

                if (doEval)
                {
                    List <string> terms = new List <string>();

                    if (ipage.AllWords.isNullOrEmpty())
                    {
                        terms = GetTermsForPage(target, idomain, ipage, evaluator, loger);
                    }
                    else
                    {
                        terms = ipage.AllWords.SplitSmart(",", "", true);
                    }


                    ipage.AllWords = terms.toCsvInLine();


                    double IP = 0;

                    List <string> lemmas = new List <string>();

                    List <IWeightTableTerm> mchl = mTFIDF.GetMatches(terms);

                    if (ipage.AllLemmas.isNullOrEmpty())
                    {
                        //  terms = GetTermsForPage(target, idomain, ipage, evaluator, loger);

                        lemmas.AddRange(mchl.Select(x => x.nominalForm));
                    }
                    else
                    {
                        lemmas = ipage.AllLemmas.SplitSmart(",", "", true);
                    }



                    foreach (weightTableTermCompiled cterm in mchl)
                    {
                        IP += cterm.tf_idf;
                        //dTerms.AddUnique(cterm.nominalForm);

                        if (cterm.df == 1)
                        {
                            dDistinctTerms.AddUnique(cterm.nominalForm);
                        }
                    }

                    ipage.InfoPrize = IP;

                    dIP += IP;

                    ipage.Lemmas = lemmas.Count;

                    ipage.AllLemmas = lemmas.toCsvInLine();

                    dWords.AddRange(terms);
                    dLemmas.AddRange(lemmas);

                    ipage.Note = "indexUpdate" + SessionID;

                    //   lock (updateIndexLockB)
                    //    {
                    imbWEMManager.index.pageIndexTable.AddOrUpdate(ipage);
                    //   }
                    // if (loger!=null) loger.AppendLine(String.Format("[{0,25}] [{1,70}] IP[{2,7}] LM[{3,6}]", idomain.domain, ipage.url.TrimToMaxLength(60), ipage.InfoPrize.ToString("F4"), ipage.Lemmas.ToString("D5")));
                }
                else
                {
                    dIP    += ipage.InfoPrize;
                    doEvalD = false;
                    // if (loger != null) loger.AppendLine(String.Format("[{0,25}] [{1,70}] IP[{2,7}] LM[{3,6}]", "  ^---- using existing ", ipage.url.TrimToMaxLength(60), ipage.InfoPrize.ToString("F4"), ipage.Lemmas.ToString("D5")));
                }

                urls.Add(ipage.url);

                p++;
                loger.AppendLine(string.Format("[{0,25}] [{1,70}] IP[{2,7}] LM[{3,6}]", idomain.domain, ipage.url.toWidthMaximum(60), ipage.InfoPrize.ToString("F4"), ipage.Lemmas.ToString("D5")));
                target.Dispose();
            }


            if (imbWEMManager.settings.indexEngine.plugIn_indexDBUpdater_updateDomainEntry)
            {
                if (!doEvalD)
                {
                    var dlc_tf = imbWEMManager.index.experimentEntry.GetTFIDF_DLC(idomain);
                    int dlc_c  = dlc_tf.Count;


                    idomain.TFIDFcompiled = (dlc_c > 0);
                    idomain.Lemmas        = dlc_c;
                }
                else
                {
                    idomain.Lemmas         = dLemmas.Count;
                    idomain.Words          = dWords.Count;
                    idomain.TFIDFcompiled  = (dLemmas.Count > 0);
                    idomain.DistinctLemmas = dDistinctTerms.toCsvInLine();
                    idomain.AllLemmas      = dLemmas.toCsvInLine();
                    idomain.AllWords       = dWords.toCsvInLine();
                }
                idomain.InfoPrize = dIP;
                //if (doEvalD)


                var urlAssert = imbWEMManager.index.pageIndexTable.GetUrlAssertion(urls);

                idomain.relevantPages        = urlAssert[indexPageEvaluationEntryState.isRelevant].Count;
                idomain.notRelevantPages     = urlAssert[indexPageEvaluationEntryState.notRelevant].Count;
                idomain.detected             = urlAssert[indexPageEvaluationEntryState.haveNoEvaluationEntry].Count;
                idomain.Crawled              = urlAssert.certainty;
                idomain.RelevantContentRatio = urlAssert.relevant;
                string rpp = string.Format("[{0,25}] Pages [{1,10}] IP[{2,10}] LM[{3,10}]", idomain.domain, p, idomain.InfoPrize.ToString("F5"), idomain.Lemmas.ToString("D7"));
                if (loger != null)
                {
                    loger.AppendLine(rpp);
                }
            }



            if (imbWEMManager.settings.indexEngine.plugIn_indexDBUpdater_updateDomainEntry)
            {
                imbWEMManager.index.domainIndexTable.AddOrUpdate(idomain);
            }
            imbWEMManager.index.wRecordsDeployed++;

            __wRecord.Dispose();
        }
 public aceConsolePluginForFiles(string __name, string __help = "", builderForLog __output = null) : base(__name, __help, __output)
 {
     folder = new DirectoryInfo(Directory.GetCurrentDirectory());
 }
Example #23
0
        //  public string description { get; set; } = "Adjusts the TC_max to reach designated goal";



        //  public bool IsEnabled { get; set; } = true;


        //  public string name { get; set; } = "TC_Control";

        protected enginePlugIn_base()
        {
            loger = new builderForLog();
            aceLog.consoleControl.setAsOutput(loger, name);
        }
Example #24
0
        /// <summary>
        /// Sets the execution context.
        /// </summary>
        /// <param name="_manager">The manager.</param>
        /// <param name="_setup">The setup.</param>
        /// <param name="_tools">The tools.</param>
        /// <param name="_classes">The classes.</param>
        /// <param name="sufix">The sufix.</param>
        /// <param name="chunker">The chunker.</param>
        /// <param name="_masterExtractor">The master extractor.</param>
        /// <param name="_logger">The logger.</param>
        public void SetExecutionContext(experimentManager _manager, experimentSetup _setup, classifierTools _tools, DocumentSetClasses _classes, String sufix, chunkComposerBasic chunker, semanticFVExtractor _masterExtractor, ILogBuilder _logger = null)
        {
            if (_logger == null)
            {
                _logger = new builderForLog();
                aceLog.consoleControl.setAsOutput(_logger, _setup.name);
            }
            logger        = _logger;
            chunkComposer = chunker;
            setup         = _setup;
            tools         = _tools;
            tools.context = this;
            classes       = _classes;
            // masterConstructor = _masterExtractor.termTableConstructor;



            masterExtractor   = _setup.featureVectorExtractors_semantic.First();
            masterConstructor = masterExtractor.termTableConstructor;
            manager           = _manager;
            String expContextName = "exp_" + setup.name.add(sufix, "_");

            folder           = manager.folder.Add(expContextName, "Experiment " + setup.name, "Directory with all information on the experiment [" + setup.name + "]");
            errorNotesFolder = folder.Add("errors", "Error logs", "Directory with error reports produced if an exception occours. Normally, if everything was ok this folder should have only two files inside: directory_readme.txt and empty: note.txt).");
            errorNotes       = new experimentNotes(errorNotesFolder, "Notes (logs) about critical and non-critical errors that happen during experiment execution. If everything was ok - this file should remain empty");

            notes = new experimentNotes(folder, "Notes on experiment setup and execution log");
            aceLog.consoleControl.setAsOutput(notes, "Notes");

            notes.log("Experiment [" + expContextName + "] initiated");
            notes.AppendLine("About: " + setup.description);

            notes.AppendHorizontalLine();



            notes.SaveNote();
            notes.AppendHeading("Feature extraction models");

            var lnsc = chunkComposer.DescribeSelf();

            lnsc.ForEach(x => notes.AppendLine(x));
            notes.AppendLine(" - ");


            List <String> mdn = new List <string>();

            foreach (var md in setup.models)
            {
                if (mdn.Contains(md.name))
                {
                    md.name += "_" + mdn.Count.ToString();
                }
                else
                {
                    mdn.Add(md.name);
                }
            }

            foreach (var md in setup.models)
            {
                String prefix = md.name;
                md.classes = classes;
                md.BuildFeatureVectorDefinition();

                var lns = md.DescribeSelf();
                lns.ForEach(x => notes.AppendLine(x));



                kFoldValidationCollection validationCases = classes.BuildValidationCases(prefix, setup.validationSetup.k, tools.DoDebug, logger, folder, setup.validationSetup.randomize);
                validationCases.pipelineCollection = pipelineCollection;

                validationCases.connectContext(this, md);

                validationCollections.Add(md.name, validationCases);


                //md.postClassifiers = setup.classifiers;
            }
        }
Example #25
0
        protected void prepare()
        {
            _output = new builderForLog();

            aceLog.consoleControl.setAsOutput(_output, consoleTitle);
        }
        /// <summary>
        /// Gets the or create tfidf DLC.
        /// </summary>
        /// <param name="__wRecord">The w record.</param>
        /// <param name="loger">The loger.</param>
        /// <param name="__useExisting">if set to <c>true</c> [use existing].</param>
        /// <param name="__saveToCache">if set to <c>true</c> [save to cache].</param>
        /// <param name="evaluator">The evaluator.</param>
        /// <returns></returns>
        public weightTableCompiled GetOrCreateTFIDF_DLC(modelSpiderSiteRecord __wRecord, builderForLog loger, bool __useExisting, bool __saveToCache, multiLanguageEvaluator evaluator = null)
        {
            indexDomain idomain = imbWEMManager.index.domainIndexTable.GetOrCreate(__wRecord.domain);

            FileInfo            TFIDF_DLC_File = GetTFIDF_DLC_File(idomain, getWritableFileMode.existing);
            weightTableCompiled TFIDF_DLC      = null;

            if (TFIDF_DLC_File.Exists && __useExisting)
            {
                TFIDF_DLC = new weightTableCompiled(TFIDF_DLC_File.FullName, true, idomain.domain + "_DLC_TF_IDF");

                loger.log("DLC TF-IDF[" + TFIDF_DLC.Count + "] cache found for: " + idomain.domain);
                return(TFIDF_DLC);
            }

            if (evaluator == null)
            {
                evaluator = __wRecord.tRecord.evaluator;
            }

            // <--------------- evaluator selection

            if (imbWEMManager.settings.TFIDF.doUseHeuristicDLCTFIDFConstruction)
            {
                TFIDF_DLC = GetOrCreateTFIDF_DLC_Heuristic(__wRecord, loger, __useExisting, __saveToCache, evaluator);
            }
            else
            {
                loger.log("DLC TF-IDF construction for: " + idomain.domain + " initiated.");

                termDocumentSet domainSet = new termDocumentSet("DomainTFIDF_source");

                var tLoaded = __wRecord.context.targets.GetLoaded();
                int tc      = tLoaded.Count;
                int ti      = 0;
                int ts      = 10;
                int c       = 0;

                int    input_c  = 0;
                int    output_c = 0;
                double io_r     = 0;

                foreach (spiderTarget target in tLoaded)
                {
                    ti++;
                    c++;
                    double tp = ti.GetRatio(tc);

                    if (target.IsRelevant)
                    {
                        var wordlist = GetTermsForPage(target, idomain, null, evaluator, loger);
                        input_c += wordlist.Count;

                        termDocument pageTF = domainSet.AddTable(target.pageHash) as termDocument;
                        pageTF.expansion = 1;
                        pageTF.AddTokens(wordlist, loger);

                        output_c += pageTF.Count();
                    }

                    if (c > 10)
                    {
                        c    = 0;
                        io_r = output_c.GetRatio(input_c);
                        aceLog.consoleControl.writeToConsole("Pages processed [" + tp.ToString("P2") + "] Semantic compression rate: " + io_r.ToString("P2"), loger, false, 0);
                    }
                }

                loger.log("[" + idomain.domain + "] preprocess finished. DLC TF-IDF terms [" + domainSet.CountAllDocuments() + "]");

                TFIDF_DLC      = domainSet.AggregateDocument.GetCompiledTable(loger);
                TFIDF_DLC.name = "DLC-TFIDF " + idomain.domain;
            }

            idomain.Lemmas = TFIDF_DLC.Count;

            if (__saveToCache)
            {
                if (TFIDF_DLC.SaveAs(TFIDF_DLC_File.FullName, getWritableFileMode.overwrite))
                {
                    loger.log("[" + idomain.domain + "] DLC TF-IDF compiled table cache saved to: " + TFIDF_DLC_File.FullName);
                }
                else
                {
                    loger.log("[" + idomain.domain + "] DLC TF-IDF compiled table save failed");
                }
            }

            imbWEMManager.index.domainIndexTable.AddOrUpdate(idomain);

            return(TFIDF_DLC);
        }
Example #27
0
 public static void setTerminal(builderForLog __terminal)
 {
     _terminal = __terminal;
 }
Example #28
0
        public void evaluateIndexPerformance(indexPerformanceEntry indexSessionEntry, builderForLog loger, string crawlId)
        {
            indexSessionEntry.SessionID       = imbWEMManager.index.experimentManager.SessionID;
            indexSessionEntry.CrawlID         = "[Index Evaluation]";
            indexSessionEntry.IndexRepository = imbWEMManager.index.current_indexID;
            indexSessionEntry.Start           = DateTime.Now;
            indexSessionEntry.CrawlerHash     = analyticConsole.mainAnalyticConsole.state.setupHash_crawler;
            indexSessionEntry.GlobalSetupHash = analyticConsole.mainAnalyticConsole.state.setupHash_global;

            imbWEMManager.index.domainIndexTable.GetDomains(indexDomainContentEnum.any);

            //indexSessionEntry.Domains =  domainIndexTable.Count;
            //indexSessionEntry.Pages = pageIndexTable.Count;
            //indexSessionEntry.PagesEvaluated = pageIndexTable.Where(x => !x.relevancyText.isNullOrEmpty()).Count();
        }
        /// <summary>
        /// Features the selection.
        /// </summary>
        /// <param name="context">The context.</param>
        /// <param name="log">The log.</param>
        public void FeatureSelection(OperationContext context, ILogBuilder log, Boolean EnableSelection = true)
        {
            log.log("Feature selection [" + EnableSelection.ToString() + "]");

            if (filter == null)
            {
                EnableSelection = false;
            }
            else if (filter.WeightModel == null)
            {
                EnableSelection = false;
            }
            else if (!filter.WeightModel.GlobalFactors.Any())
            {
                EnableSelection = false;
            }

            if (!context.spaceModel.IsModelReady)
            {
                log.log("-- Feature selection function shouldn't be called before creation of the space model.");
            }

            List <KeyValuePair <string, double> > filter_result = new List <KeyValuePair <string, double> >();

            if (EnableSelection)
            {
                filter_result = filter.SelectFeatures(context.spaceModel, log, notes.folder);
            }
            else
            {
            }

            builderForLog textBuilder = new builderForLog();

            filter.Describe(textBuilder);

            context.SelectedFeatures = new WeightDictionary("FS_" + context.name, "Features selected by " + filter.GetSignature() + ". Info: " + textBuilder.GetContent());

            if (filter_result.Any())
            {
                foreach (var pair in filter_result)
                {
                    context.SelectedFeatures.AddEntry(pair.Key, pair.Value);
                }

                //if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_selectedFeatures))
                //{
                //    notes.SaveDataTable(context.SelectedFeatures.MakeTable("selected_features", "Features selected for BoW construction", new List<string>() { filter.function.shortName }, generalContext.DictionaryReportLimit), notes.folder_corpus);
                //}
            }
            else
            {
                String msg = "-- Feature selection function returned zero set. All features [" + context.spaceModel.terms_known_label.Count + "] are therefore accepted as selected.";

                context.SelectedFeatures.description += msg;
                log.log(msg);
                var tkns = context.spaceModel.GetTokens(true, false);
                foreach (var tkn in tkns)
                {
                    context.SelectedFeatures.AddEntry(tkn, 1);
                }
            }

            if (context.SelectedFeatures.Count < context.spaceModel.terms_known_label.Count)
            {
                context.spaceModel.FilterSpaceModelFeatures(context.SelectedFeatures, log);
            }
            else
            {
                context.spaceModel.terms_unknown_label.FilterTokens(context.SelectedFeatures.GetKeys());
            }

            if (filter.WeightModel != null)
            {
                filter.WeightModel.Dispose();
            }
        }
Example #30
0
        /// <summary>
        /// The stage two exploration
        /// </summary>
        /// <param name="lemma">The lemma.</param>
        /// <param name="response">The response.</param>
        /// <param name="savemodel">if set to <c>true</c> [savemodel].</param>
        /// <param name="debug">if set to <c>true</c> [debug].</param>
        /// <param name="verbose">if set to <c>true</c> [verbose].</param>
        /// <returns></returns>
        public static termExploreModelSet exploreStageTwo(string lemma, ILogBuilder response, bool savemodel, bool debug, bool verbose, lexiconTaskBase task = null)
        {
            lexiconConstructor  constructor = semanticLexiconManager.manager.constructor;
            termExploreModelSet outset      = semanticLexiconManager.manager.constructor.loadTermModels(lemma, true);

            if (!Enumerable.Any(outset))
            {
                outset.missingLemmas.Add(lemma);
                return(outset);
            }

            foreach (termExploreModel mod in outset)
            {
                builderForLog logout = new builderForLog();
                if (verbose)
                {
                    aceLog.consoleControl.setAsOutput(logout, "stage2");
                }
                termExploreModel model = getSynonymsWithApertium(mod, logout);

                string pt = model.lemma.gramSet.getPosType().ToString();
                if (savemodel)
                {
                    //  model.graph.saveDescription(constructor.projectFolderStructure[lexiconConstructorProjectFolder.logs].path, pt + "_related");
                }


                model = getSynonymsWithWordnetViaApertium(model, logout, true, false);

                if (savemodel)
                {
                    model.graph.saveDescription(constructor.projectFolderStructure[lexiconConstructorProjectFolder.logs].path, pt + "_concepts");
                    // model.graph.savePaths(constructor.projectFolderStructure[lexiconConstructorProjectFolder.logs].path, pt + "_concepts");
                }


                model.PostProcess();

                if (debug)
                {
                    model.ToString(logout, true, true);
                    string fn = model.lemma.inputForm + "_" + pt + "_log.md";
                    logout.ToString(false).saveStringToFile(constructor.projectFolderStructure[lexiconConstructorProjectFolder.logs].pathFor(fn), getWritableFileMode.overwrite);
                }

                if (verbose)
                {
                    aceLog.consoleControl.removeFromOutput(logout);
                }

                if (savemodel)
                {
                    if (task != null)
                    {
                        model.lastModifiedByStage = task.taskTitle;
                    }
                    else
                    {
                        model.lastModifiedByStage = "stageTwo-exploreProcedure";
                    }
                    if (!model.wasExploreFailed)
                    {
                        constructor.saveTermModel(model);
                    }
                    else
                    {
                        outset.failedModels.Add(model);
                    }
                }
            }
            return(outset);
        }