Exemple #1
0
        public WebFVExtractorKnowledgeLibrary(kFoldValidationCollection _parent)
        {
            validationCollection = _parent;
            if (validationCollection.context.setup.doShareTheCaseKnowledgeAmongFVEModels)
            {
                validationCollection.context.notes.log("::: CASE KNOWLEDGE SHARE MODE IS ENABLED ::: MAKE SURE FVEs IN THE EXPERIMENT ARE DESCRIBING THE CASES ON THE SAME WAY :::");
                ExperimentRootFolder        = validationCollection.context.folder;
                ExperimentSharedCasesFolder = ExperimentRootFolder.Add("SharedKnowledge", "Shared Knowledge on cases", "Directory with DocumentSetCase knowledge, shared among different FVE models in this experiment. The share option is set at experiment setup XML.");
                if (!validationCollection.context.tools.operation.doUseExistingKnowledge)
                {
                    validationCollection.context.notes.log(" > doUseExistingKnowledge is FALSE --- to have the case knowledge sharing used, you have to set it to TRUE ------");
                    imbACE.Services.terminal.aceTerminalInput.doBeepViaConsole(2000, 1000, 2);
                }


                DoShareCaseKnowledge = true;
            }
            foreach (var vCase in validationCollection.GetCases())
            {
                registry.Add(vCase, new ConcurrentDictionary <string, IWebFVExtractorKnowledge>());
            }
        }
Exemple #2
0
        /// <summary>
        /// Sets the execution context.
        /// </summary>
        /// <param name="_manager">The manager.</param>
        /// <param name="_setup">The setup.</param>
        /// <param name="_tools">The tools.</param>
        /// <param name="_classes">The classes.</param>
        /// <param name="sufix">The sufix.</param>
        /// <param name="chunker">The chunker.</param>
        /// <param name="_masterExtractor">The master extractor.</param>
        /// <param name="_logger">The logger.</param>
        public void SetExecutionContext(experimentManager _manager, experimentSetup _setup, classifierTools _tools, DocumentSetClasses _classes, String sufix, chunkComposerBasic chunker, semanticFVExtractor _masterExtractor, ILogBuilder _logger = null)
        {
            if (_logger == null)
            {
                _logger = new builderForLog();
                aceLog.consoleControl.setAsOutput(_logger, _setup.name);
            }
            logger        = _logger;
            chunkComposer = chunker;
            setup         = _setup;
            tools         = _tools;
            tools.context = this;
            classes       = _classes;
            // masterConstructor = _masterExtractor.termTableConstructor;



            masterExtractor   = _setup.featureVectorExtractors_semantic.First();
            masterConstructor = masterExtractor.termTableConstructor;
            manager           = _manager;
            String expContextName = "exp_" + setup.name.add(sufix, "_");

            folder           = manager.folder.Add(expContextName, "Experiment " + setup.name, "Directory with all information on the experiment [" + setup.name + "]");
            errorNotesFolder = folder.Add("errors", "Error logs", "Directory with error reports produced if an exception occours. Normally, if everything was ok this folder should have only two files inside: directory_readme.txt and empty: note.txt).");
            errorNotes       = new experimentNotes(errorNotesFolder, "Notes (logs) about critical and non-critical errors that happen during experiment execution. If everything was ok - this file should remain empty");

            notes = new experimentNotes(folder, "Notes on experiment setup and execution log");
            aceLog.consoleControl.setAsOutput(notes, "Notes");

            notes.log("Experiment [" + expContextName + "] initiated");
            notes.AppendLine("About: " + setup.description);

            notes.AppendHorizontalLine();



            notes.SaveNote();
            notes.AppendHeading("Feature extraction models");

            var lnsc = chunkComposer.DescribeSelf();

            lnsc.ForEach(x => notes.AppendLine(x));
            notes.AppendLine(" - ");


            List <String> mdn = new List <string>();

            foreach (var md in setup.models)
            {
                if (mdn.Contains(md.name))
                {
                    md.name += "_" + mdn.Count.ToString();
                }
                else
                {
                    mdn.Add(md.name);
                }
            }

            foreach (var md in setup.models)
            {
                String prefix = md.name;
                md.classes = classes;
                md.BuildFeatureVectorDefinition();

                var lns = md.DescribeSelf();
                lns.ForEach(x => notes.AppendLine(x));



                kFoldValidationCollection validationCases = classes.BuildValidationCases(prefix, setup.validationSetup.k, tools.DoDebug, logger, folder, setup.validationSetup.randomize);
                validationCases.pipelineCollection = pipelineCollection;

                validationCases.connectContext(this, md);

                validationCollections.Add(md.name, validationCases);


                //md.postClassifiers = setup.classifiers;
            }
        }
Exemple #3
0
        /// <summary>
        /// Builds the validation cases.
        /// </summary>
        /// <param name="basename">The basename.</param>
        /// <param name="k">The k.</param>
        /// <param name="debug">if set to <c>true</c> [debug].</param>
        /// <param name="output">The output.</param>
        /// <returns></returns>
        public kFoldValidationCollection BuildValidationCases(String basename, Int32 k, Boolean debug, ILogBuilder output = null, folderNode folderOverride = null, Boolean randomize = false)
        {
            kFoldValidationCollection validationCases = new kFoldValidationCollection();

            folderNode folderToUse = folderRoot;

            if (folderOverride != null)
            {
                folderToUse = folderOverride;
            }

            validationCases.folder = folderToUse.Add(basename, basename, basename + " " + k + "-fold validation");

            validationCases.Clear();

            var classes = GetClasses();

            validationCases.sampleMatrix = new Dictionary <IDocumentSetClass, List <string> >();


            foreach (IDocumentSetClass cl in classes)
            {
                validationCases.sampleMatrix.Add(cl, cl.WebSiteSample.ToList());
                //   sampling.takeOrder = samplingOrderEnum.randomSuffle;
            }


            samplingSettings sampling = new samplingSettings();

            sampling.parts     = k;
            sampling.takeOrder = samplingOrderEnum.ordinal;

            if (randomize)
            {
                foreach (IDocumentSetClass cl in classes)
                {
                    validationCases.sampleMatrix[cl].Randomize();
                    //sampleMatrix.Add(cl, cl.WebSiteSample.ToList());
                    //   sampling.takeOrder = samplingOrderEnum.randomSuffle;
                }
            }



            for (int i = 0; i < k; i++)
            {
                var valCase = validationCases.CreateNew(basename + i.ToString("D3"));



                foreach (IDocumentSetClass cl in classes)
                {
                    List <String> sample = validationCases.sampleMatrix[cl].ToList();

                    //if (randomize)
                    //{
                    //    sample.Randomize();
                    //}

                    //Int32 foldSize = sample.Count() / k;



                    if (k > 1)
                    {
                        sampling.skip = i;

                        var eval = new sampleTake <String>(sample, sampling);

                        sample = eval.GetRestOfSource();

                        valCase.trainingCases.Add(cl.name, sample);
                        valCase.evaluationCases.Add(cl.name, eval);
                    }
                    else
                    {
                        valCase.trainingCases.Add(cl.name, sample);
                        valCase.evaluationCases.Add(cl.name, sample);
                    }

                    if (output != null)
                    {
                        if (debug)
                        {
                            output.AppendLine("Case [" + valCase.name + "] for [" + cl.name + "] have training[" + valCase.trainingCases[cl.name].Count + "] and eval[" + valCase.evaluationCases[cl.name].Count + "]");
                        }
                    }
                }

                if (output != null)
                {
                    output.log("k-fold validation case [" + valCase.name + "] created for [" + valCase.trainingCases.Count + "] industries");
                }
            }

            //validationCases.OnLoad(null, output);

            validationCases.OnBeforeSave(output);
            return(validationCases);
        }