/// <summary> /// Stores or updates the context /// </summary> /// <param name="context">The context.</param> public void StoreContext(IPlaneContext context) { String n = context.GetType().Name; if (items.ContainsKey(n)) { items[n] = context; } else { items.Add(n, context); } }
public IPlaneContext ExecutePlaneMethod(IPlaneContext inputContext, ExperimentModelExecutionContext generalContext, ILogBuilder logger) { //if (generalContext == null) //{ // generalContext = new PlanesMethodContext(); //} IEntityPlaneContext entityInputContext = inputContext as IEntityPlaneContext; ICorpusPlaneContext entityContext = EntityMethod.ExecutePlaneMethod(inputContext, generalContext, logger) as ICorpusPlaneContext; IVectorPlaneContext corpusContext = CorpusMethod.ExecutePlaneMethod(entityContext, generalContext, logger) as IVectorPlaneContext; IFeaturePlaneContext vectorContext = VectorMethod.ExecutePlaneMethod(corpusContext, generalContext, logger) as IFeaturePlaneContext; IFeaturePlaneContext featureContext = FeatureMethod.ExecutePlaneMethod(vectorContext, generalContext, logger) as IFeaturePlaneContext; // --- the results reporting var evaluationMetrics = generalContext.truthTable.EvaluateTestResultsToMetricSet(featureContext.testResults, generalContext.runName + "-" + notes.folder.name, logger); DataTableTypeExtended <classificationEval> inclassEvalTable = new DataTableTypeExtended <classificationEval>("inclass_evaluation", "Test results, per class"); evaluationMetrics.GetAllEntries().ForEach(x => inclassEvalTable.AddRow(x)); inclassEvalTable.AddRow(evaluationMetrics.GetSummary("Sum")); notes.SaveDataTable(inclassEvalTable, notes.folder_classification); classificationReport averagedReport = new classificationReport(evaluationMetrics, generalContext.averagingMethod); averagedReport.Classifier = FeatureMethod.classifier.name; averagedReport.saveObjectToXML(notes.folder_classification.pathFor(averagedReport.Name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized classification evaluation results summary")); generalContext.testSummaries.Add(averagedReport); averagedReport.ReportToLog(notes); featureContext.provider.Dispose(); EntityMethod.CacheProvider.Dispose(); return(generalContext); }
/// <summary> /// Generates feature vectors /// </summary> /// <param name="inputContext">The input context - related to this plane.</param> /// <param name="generalContext">General execution context, attached to the <see cref="T:imbNLP.Toolkit.Planes.PlanesMethodDesign" /></param> /// <param name="logger">The logger.</param> /// <returns> /// Retur /// </returns> public IPlaneContext ExecutePlaneMethod(IPlaneContext inputContext, ExperimentModelExecutionContext generalContext, ILogBuilder logger) { notes.logStartPhase("[3] Vector Plane - execution", ""); IVectorPlaneContext context = (IVectorPlaneContext)inputContext; FeaturePlaneContext finalContext = new FeaturePlaneContext(); finalContext.provider.StoreAndReceive(context); ICorpusPlaneContext corpusContext = finalContext.provider.GetContext <CorpusPlaneContext>(); // deploying feature vector space constructor featureSpaceConstructor.Deploy(constructorSettings, context.vectorSpace); featureSpaceConstructor.Deploy(constructorSettings, corpusContext.SelectedFeatures); Dictionary <string, FeatureVector> docByName = new Dictionary <string, FeatureVector>(); notes.log(":: Constructing feature vectors"); // constructing the feature vectors foreach (IVector vector in context.vectorSpace.documents) { var fv = featureSpaceConstructor.ConstructFeatureVector(vector); docByName.Add(fv.name, fv); finalContext.featureSpace.documents.Add(fv); } foreach (var link in context.LabelToDocumentLinks.links) { finalContext.featureSpace.labelToDocumentAssociations.Add(docByName[link.NodeB.name], link.NodeA, 1); } if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_featureVectors)) { var dt = finalContext.featureSpace.MakeTable(featureSpaceConstructor, "FeatureSpace", "Feature space"); notes.SaveDataTable(dt, notes.folder_feature); } notes.logEndPhase(); return(finalContext); }
/// <summary> /// Executes the plane method, invoking contained functions according to the settings /// </summary> /// <param name="inputContext">The input context - related to this plane.</param> /// <param name="generalContext">General execution context, attached to the <see cref="T:imbNLP.Toolkit.Planes.PlanesMethodDesign" /></param> /// <param name="logger">The logger.</param> /// <returns> /// Retur /// </returns> public IPlaneContext ExecutePlaneMethod(IPlaneContext inputContext, ExperimentModelExecutionContext generalContext, ILogBuilder logger) { notes.logStartPhase("[2] Corpus Plane - execution", ""); ICorpusPlaneContext context = (ICorpusPlaneContext)inputContext; VectorPlaneContext outputContext = new VectorPlaneContext(); outputContext.provider.StoreAndReceive(context); context.stemmContext = new StemmingContext(stemmer); Dictionary <String, SpaceDocumentModel> documentVsModel = new Dictionary <string, SpaceDocumentModel>(); // modelling the documents foreach (TextDocument doc in context.corpus_documents) { SpaceDocumentModel model = spaceConstructor.ConstructDocument(doc.content, doc.name, context.space, context.stemmContext, tokenizer); List <SpaceLabel> labels = spaceConstructor.GetLabels(doc.labels, context.space); Boolean isUnknownLabel = true; foreach (SpaceLabel label in labels) { if (label.name != SpaceLabel.UNKNOWN) { isUnknownLabel = false; } context.space.LabelToDocumentLinks.Add(label, model, 1); } context.space.documents.Add(model); if (!isUnknownLabel) { context.space.terms.MergeDictionary(model.terms); } documentVsModel.Add(doc.name, model); } if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_fold_textrender)) { foreach (TextDocument doc in context.corpus_documents) { String prefix = doc.labels.FirstOrDefault(); if (prefix.isNullOrEmpty()) { prefix = SpaceLabel.UNKNOWN; } String fn = prefix + "_" + doc.name; String pth = notes.folder_entity.pathFor(fn.getFilename("txt"), imbSCI.Data.enums.getWritableFileMode.overwrite, "Textual representation of website [" + doc.name + "], produced by rendering and blending settings", true); doc.content.saveStringToFile(pth, imbSCI.Data.enums.getWritableFileMode.overwrite); } } if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_fold_stats)) { foreach (WebSiteDocumentsSet ds in context.dataset) { DataTable dt = ds.MakeTable(documentVsModel); notes.SaveDataTable(dt, notes.folder_entity); } var dt_vsm = context.space.LabelToDocumentLinks.MakeTable("LabelToDocument", "Relationships between labels and documents in the primary Vector Space Model"); notes.SaveDataTable(dt_vsm, notes.folder_corpus); } if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_corpusDictionary)) { notes.SaveDataTable(context.space.terms.MakeTable("corpus_stats", "Training set dictionary, after stemming", generalContext.DictionaryReportLimit), notes.folder_corpus); } #region SELECTING THE FEATURES // forming corpus global weight context.SelectedFeatures = new WeightDictionary(); List <KeyValuePair <string, double> > filter_result = filter.SelectFeatures(context.space); List <string> FV = new List <string>(); FV.AddRange(filter_result.Select(x => x.Key)); if (filter_result.Any()) { foreach (var pair in filter_result) { context.SelectedFeatures.AddEntry(pair.Key, pair.Value); } if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_selectedFeatures)) { notes.SaveDataTable(context.SelectedFeatures.MakeTable("selected_features", "Features selected for BoW construction", new List <string>() { filter.function.shortName }, generalContext.DictionaryReportLimit), notes.folder_corpus); } } else { logger.log("-- Feature selection function returned zero set. All features [" + context.space.terms.Count + "] are therefore accepted as selected."); var tkns = context.space.terms.GetTokens(); foreach (var tkn in tkns) { context.SelectedFeatures.AddEntry(tkn, 1); } } #endregion notes.log("Selected features [" + context.SelectedFeatures.entries.Count + "] by [" + filter.functionSettings.functionName + "]"); //context.space = //weightModel.Deploy(); outputContext.vectorSpace = new Vectors.VectorSpace(); foreach (SpaceLabel label in context.space.labels) { var docs = context.space.LabelToDocumentLinks.GetAllLinked(label); if (label.name != SpaceLabel.UNKNOWN) { SpaceCategoryModel categoryModel = new SpaceCategoryModel(label, docs); context.space.LabelToCategoryLinks.Add(label, categoryModel, 1); context.space.categories.Add(categoryModel); notes.log("Class [" + categoryModel.name + "] BoW model created - terms[" + categoryModel.terms.Count + "] "); } } outputContext.LabelToDocumentLinks = context.space.LabelToDocumentLinks; // preparing the model weightModel.PrepareTheModel(context.space); // logger.log(":: Creating VectorSpace instances for documents"); // building document VSM foreach (SpaceDocumentModel docModel in context.space.documents) { var wd = weightModel.GetWeights(FV, docModel, context.space); VectorDocument docVec = new VectorDocument(docModel.name); docVec.terms = wd; if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_documentBoWModels)) { DataTable dt = wd.MakeTable("docVec_" + docModel.name, "Document vector model", null, 10000); notes.SaveDataTable(dt, notes.folder_vector); } outputContext.vectorSpace.documents.Add(docVec); } // logger.log(":: Creating VectorSpace instances for categories"); // building category VSM foreach (SpaceCategoryModel catModel in context.space.categories) { var wd = weightModel.GetWeights(FV, catModel, context.space); VectorLabel catVec = new VectorLabel(catModel.name); catVec.terms = wd; if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_documentBoWModels)) { DataTable dt = wd.MakeTable("catVec_" + catModel.name, "Document vector model", null, 10000); notes.SaveDataTable(dt, notes.folder_vector); } outputContext.vectorSpace.labels.Add(catVec); } if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_documentBoWModels)) { foreach (SpaceCategoryModel catModel in context.space.categories) { var dt = catModel.terms.MakeTable("cat_" + catModel.name, "Vector Space BoW weighted model, representing a category"); notes.SaveDataTable(dt, notes.folder_vector); } } notes.logEndPhase(); return(outputContext); }
/// <summary> /// Executes the plane method, invoking contained functions according to the settings /// </summary> /// <param name="inputContext">The input context - related to this plane.</param> /// <param name="generalContext">General execution context, attached to the <see cref="T:imbNLP.Toolkit.Planes.PlanesMethodDesign" /></param> /// <param name="logger">The logger.</param> /// <returns> /// Retur /// </returns> public IPlaneContext ExecutePlaneMethod(IPlaneContext inputContext, ExperimentModelExecutionContext generalContext, ILogBuilder logger) { notes.logStartPhase("[4] Feature Plane - execution", ""); IFeaturePlaneContext context = inputContext as IFeaturePlaneContext; foreach (FeatureVector vec in context.featureSpace.documents) { var associated = context.featureSpace.labelToDocumentAssociations.GetAllLinked(vec); if (associated.Any()) { Int32 lbi = generalContext.truthTable.labels_without_unknown.IndexOf(associated.First().name); FeatureVectorWithLabelID fvl = new FeatureVectorWithLabelID(vec, lbi); context.trainingSet.Add(fvl); } else { context.testSet.Add(vec); } } if (!context.testSet.Any()) { notes.log("TEST SET IS EMPTY ---- APPLYING 1:1 EXPERIMENT SHEME: training and test set are the same"); context.trainingSet.ForEach(x => context.testSet.Add(x.vector)); } if ((!context.trainingSet.Any())) { notes.log("TRAINING SET EMPTY ---- APPLYING 1:1 EXPERIMENT SHEME: training and test set are the same"); } else { notes.log("Training [" + classifier.name + "] with [" + context.trainingSet.Count + "] feature vectors."); classifier.DoTraining(context.trainingSet, logger); notes.log("Testing [" + classifier.name + "] with [" + context.testSet.Count + "] feature vectors."); context.testResults = new List <FeatureVectorWithLabelID>(); foreach (FeatureVector fv in context.testSet) { Int32 result = classifier.DoSelect(fv, logger); FeatureVectorWithLabelID fvl = new FeatureVectorWithLabelID(fv, result); context.testResults.Add(fvl); } /* * Dictionary<string, List<FeatureVectorWithLabelID>> byCategory = generalContext.truthTable.GroupByTrueCategory(context.testResults); * objectTable<classificationReport> tbl = new objectTable<classificationReport>(nameof(classificationReport.Name), "inclass_" + generalContext.runName); * classificationReport macroAverage = new classificationReport("AVG-" + generalContext.runName); * foreach (KeyValuePair<string, List<FeatureVectorWithLabelID>> pair in byCategory) * { * var cReport = generalContext.EvaluateTestResults(pair.Value, pair.Key + "-" + generalContext.runName, logger); * * cReport.Classifier = classifier.name; * cReport.Comment = "Tr/Ts [" + context.trainingSet.Count + "]:[" + context.testSet.Count + "]"; * String path = notes.folder_classification.pathFor(pair.Key + "_result.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized evaluation result within category [" + pair.Key + "]", true); * * macroAverage.AddValues(cReport); * * tbl.Add(cReport); * } * // macroAverage.DivideValues(byCategory.Keys.Count); * * tbl.Add(macroAverage); * * notes.SaveDataTable(tbl.GetDataTable(), notes.folder_classification); */ } notes.logEndPhase(); return(context); }
/// <summary> /// Executes the plane method, invoking contained functions according to the settings /// </summary> /// <param name="inputContext">The input context - related to this plane.</param> /// <param name="generalContext">General execution context, attached to the <see cref="T:imbNLP.Toolkit.Planes.PlanesMethodDesign" /></param> /// <param name="logger">The logger.</param> /// <returns> /// Retur /// </returns> public IPlaneContext ExecutePlaneMethod(IPlaneContext inputContext, ExperimentModelExecutionContext generalContext, ILogBuilder logger) { if (notes != null) { notes.logStartPhase("[1] Entity Plane - execution", ""); } IEntityPlaneContext context = inputContext as IEntityPlaneContext; CorpusPlaneContext outputContext = new CorpusPlaneContext(); outputContext.provider.StoreAndReceive(context); outputContext.dataset = context.dataset; // ---------------- rendering procedure Dictionary <WebSiteDocumentsSet, List <TextDocumentSet> > renderIndex = new Dictionary <WebSiteDocumentsSet, List <TextDocumentSet> >(); Dictionary <string, SpaceLabel> labels = new Dictionary <string, SpaceLabel>(); Dictionary <WebSiteDocuments, TextDocumentSet> sitesToRenders = new Dictionary <WebSiteDocuments, TextDocumentSet>(); Dictionary <String, WebSiteDocuments> inputSites = new Dictionary <string, WebSiteDocuments>(); Dictionary <String, TextDocumentSet> inputTextRenders = new Dictionary <string, TextDocumentSet>(); Dictionary <WebSiteDocuments, List <SpaceLabel> > inputSiteVsLabels = new Dictionary <WebSiteDocuments, List <SpaceLabel> >(); Int32 c = 0; // rendering foreach (WebSiteDocumentsSet docSet in context.dataset) { if (docSet.name.isNullOrEmpty() || docSet.name == SpaceLabel.UNKNOWN) { outputContext.space.label_unknown = new SpaceLabel(SpaceLabel.UNKNOWN); labels.Add(SpaceLabel.UNKNOWN, outputContext.space.label_unknown); } else { SpaceLabel lab = new SpaceLabel(docSet.name); labels.Add(lab.name, lab); outputContext.space.labels.Add(lab); } String datasetSignature = context.dataset.GetDataSetSignature(); // ---- render List <TextDocumentSet> textSetForLabel = new List <TextDocumentSet>(); if (CacheProvider.IsReady) { foreach (WebSiteDocuments site in docSet) { TextDocumentSet tds = CacheProvider.GetCached <TextDocumentSet>(setupSignature, datasetSignature, site.domain); if (tds == null) { tds = render.RenderSiteDocuments(site, logger); CacheProvider.SetCached(setupSignature, datasetSignature, tds.name, tds); } else { tds.name = site.domain; } textSetForLabel.Add(tds); } } else { textSetForLabel = render.RenderDocumentSet(docSet, logger); foreach (TextDocumentSet ws in textSetForLabel) { CacheProvider.SetCached(setupSignature, datasetSignature, ws.name, ws); } } // // <--- performs the rendering textSetForLabel.ForEach(x => inputTextRenders.Add(x.name, x)); // --- rest of indexing docSet.ForEach(x => inputSites.Add(x.domain, x)); renderIndex.Add(docSet, textSetForLabel); foreach (WebSiteDocuments site in docSet) { inputSiteVsLabels.Add(site, new List <SpaceLabel>()); inputSiteVsLabels[site].Add(labels[docSet.name]); c++; } } if (notes != null) { notes.log("Text document for [" + c + "] entities created"); } // tmp index foreach (String key in inputSites.Keys) { sitesToRenders.Add(inputSites[key], inputTextRenders[key]); } // page in site filtering if (filter.IsEnabled) { Dictionary <WebSiteDocuments, TextDocumentSet> renderIndexFiltered = new Dictionary <WebSiteDocuments, TextDocumentSet>(); filter.Learn(inputTextRenders.Values); foreach (KeyValuePair <WebSiteDocuments, TextDocumentSet> pair in sitesToRenders) { renderIndexFiltered.Add(pair.Key, filter.FilterDocumentSet(pair.Value)); } sitesToRenders = renderIndexFiltered; } Dictionary <String, TextDocumentSet> TextDocumentsByDomainName = new Dictionary <string, TextDocumentSet>(); foreach (var pair in sitesToRenders) { TextDocumentsByDomainName.Add(pair.Key.domain, pair.Value); } // blending pages into single page per web site // DoBlendPagesIntoSingleEntity = blender.options.HasFlag(DocumentBlenderFunctionOptions.separatePages); Boolean keepSeparated = blender.DoKeepPagesSeparated; foreach (var pair in renderIndex) { foreach (TextDocumentSet entitySet in pair.Value) { TextDocumentSet selectedTexts = TextDocumentsByDomainName[entitySet.name]; WebSiteDocuments web = inputSites[entitySet.name]; IEnumerable <string> label = inputSiteVsLabels[web].Select(x => x.name); if (keepSeparated) { // filter function TextDocument doc = blender.blendToTextDocument(selectedTexts); doc.labels.AddRange(label); outputContext.corpus_documents.Add(doc); } else { var docs = blender.blendToSeparateTextDocuments(selectedTexts); //blender.blendToTextDocument(selectedTexts); foreach (TextDocument doc in docs) { doc.labels.AddRange(label); outputContext.corpus_documents.Add(doc); } } } } if (notes != null) { notes.logEndPhase(); } return(outputContext); }
/// <summary> /// Stores the and receive. /// </summary> /// <param name="context">The context.</param> public void StoreAndReceive(IPlaneContext context) { Receive(context.provider); StoreContext(context); }