public WebFVExtractorKnowledgeLibrary(kFoldValidationCollection _parent) { validationCollection = _parent; if (validationCollection.context.setup.doShareTheCaseKnowledgeAmongFVEModels) { validationCollection.context.notes.log("::: CASE KNOWLEDGE SHARE MODE IS ENABLED ::: MAKE SURE FVEs IN THE EXPERIMENT ARE DESCRIBING THE CASES ON THE SAME WAY :::"); ExperimentRootFolder = validationCollection.context.folder; ExperimentSharedCasesFolder = ExperimentRootFolder.Add("SharedKnowledge", "Shared Knowledge on cases", "Directory with DocumentSetCase knowledge, shared among different FVE models in this experiment. The share option is set at experiment setup XML."); if (!validationCollection.context.tools.operation.doUseExistingKnowledge) { validationCollection.context.notes.log(" > doUseExistingKnowledge is FALSE --- to have the case knowledge sharing used, you have to set it to TRUE ------"); imbACE.Services.terminal.aceTerminalInput.doBeepViaConsole(2000, 1000, 2); } DoShareCaseKnowledge = true; } foreach (var vCase in validationCollection.GetCases()) { registry.Add(vCase, new ConcurrentDictionary <string, IWebFVExtractorKnowledge>()); } }
/// <summary> /// Sets the execution context. /// </summary> /// <param name="_manager">The manager.</param> /// <param name="_setup">The setup.</param> /// <param name="_tools">The tools.</param> /// <param name="_classes">The classes.</param> /// <param name="sufix">The sufix.</param> /// <param name="chunker">The chunker.</param> /// <param name="_masterExtractor">The master extractor.</param> /// <param name="_logger">The logger.</param> public void SetExecutionContext(experimentManager _manager, experimentSetup _setup, classifierTools _tools, DocumentSetClasses _classes, String sufix, chunkComposerBasic chunker, semanticFVExtractor _masterExtractor, ILogBuilder _logger = null) { if (_logger == null) { _logger = new builderForLog(); aceLog.consoleControl.setAsOutput(_logger, _setup.name); } logger = _logger; chunkComposer = chunker; setup = _setup; tools = _tools; tools.context = this; classes = _classes; // masterConstructor = _masterExtractor.termTableConstructor; masterExtractor = _setup.featureVectorExtractors_semantic.First(); masterConstructor = masterExtractor.termTableConstructor; manager = _manager; String expContextName = "exp_" + setup.name.add(sufix, "_"); folder = manager.folder.Add(expContextName, "Experiment " + setup.name, "Directory with all information on the experiment [" + setup.name + "]"); errorNotesFolder = folder.Add("errors", "Error logs", "Directory with error reports produced if an exception occours. Normally, if everything was ok this folder should have only two files inside: directory_readme.txt and empty: note.txt)."); errorNotes = new experimentNotes(errorNotesFolder, "Notes (logs) about critical and non-critical errors that happen during experiment execution. If everything was ok - this file should remain empty"); notes = new experimentNotes(folder, "Notes on experiment setup and execution log"); aceLog.consoleControl.setAsOutput(notes, "Notes"); notes.log("Experiment [" + expContextName + "] initiated"); notes.AppendLine("About: " + setup.description); notes.AppendHorizontalLine(); notes.SaveNote(); notes.AppendHeading("Feature extraction models"); var lnsc = chunkComposer.DescribeSelf(); lnsc.ForEach(x => notes.AppendLine(x)); notes.AppendLine(" - "); List <String> mdn = new List <string>(); foreach (var md in setup.models) { if (mdn.Contains(md.name)) { md.name += "_" + mdn.Count.ToString(); } else { mdn.Add(md.name); } } foreach (var md in setup.models) { String prefix = md.name; md.classes = classes; md.BuildFeatureVectorDefinition(); var lns = md.DescribeSelf(); lns.ForEach(x => notes.AppendLine(x)); kFoldValidationCollection validationCases = classes.BuildValidationCases(prefix, setup.validationSetup.k, tools.DoDebug, logger, folder, setup.validationSetup.randomize); validationCases.pipelineCollection = pipelineCollection; validationCases.connectContext(this, md); validationCollections.Add(md.name, validationCases); //md.postClassifiers = setup.classifiers; } }
/// <summary> /// Builds the validation cases. /// </summary> /// <param name="basename">The basename.</param> /// <param name="k">The k.</param> /// <param name="debug">if set to <c>true</c> [debug].</param> /// <param name="output">The output.</param> /// <returns></returns> public kFoldValidationCollection BuildValidationCases(String basename, Int32 k, Boolean debug, ILogBuilder output = null, folderNode folderOverride = null, Boolean randomize = false) { kFoldValidationCollection validationCases = new kFoldValidationCollection(); folderNode folderToUse = folderRoot; if (folderOverride != null) { folderToUse = folderOverride; } validationCases.folder = folderToUse.Add(basename, basename, basename + " " + k + "-fold validation"); validationCases.Clear(); var classes = GetClasses(); validationCases.sampleMatrix = new Dictionary <IDocumentSetClass, List <string> >(); foreach (IDocumentSetClass cl in classes) { validationCases.sampleMatrix.Add(cl, cl.WebSiteSample.ToList()); // sampling.takeOrder = samplingOrderEnum.randomSuffle; } samplingSettings sampling = new samplingSettings(); sampling.parts = k; sampling.takeOrder = samplingOrderEnum.ordinal; if (randomize) { foreach (IDocumentSetClass cl in classes) { validationCases.sampleMatrix[cl].Randomize(); //sampleMatrix.Add(cl, cl.WebSiteSample.ToList()); // sampling.takeOrder = samplingOrderEnum.randomSuffle; } } for (int i = 0; i < k; i++) { var valCase = validationCases.CreateNew(basename + i.ToString("D3")); foreach (IDocumentSetClass cl in classes) { List <String> sample = validationCases.sampleMatrix[cl].ToList(); //if (randomize) //{ // sample.Randomize(); //} //Int32 foldSize = sample.Count() / k; if (k > 1) { sampling.skip = i; var eval = new sampleTake <String>(sample, sampling); sample = eval.GetRestOfSource(); valCase.trainingCases.Add(cl.name, sample); valCase.evaluationCases.Add(cl.name, eval); } else { valCase.trainingCases.Add(cl.name, sample); valCase.evaluationCases.Add(cl.name, sample); } if (output != null) { if (debug) { output.AppendLine("Case [" + valCase.name + "] for [" + cl.name + "] have training[" + valCase.trainingCases[cl.name].Count + "] and eval[" + valCase.evaluationCases[cl.name].Count + "]"); } } } if (output != null) { output.log("k-fold validation case [" + valCase.name + "] created for [" + valCase.trainingCases.Count + "] industries"); } } //validationCases.OnLoad(null, output); validationCases.OnBeforeSave(output); return(validationCases); }