public IPlaneContext ExecutePlaneMethod(IPlaneContext inputContext, ExperimentModelExecutionContext generalContext, ILogBuilder logger) { //if (generalContext == null) //{ // generalContext = new PlanesMethodContext(); //} IEntityPlaneContext entityInputContext = inputContext as IEntityPlaneContext; ICorpusPlaneContext entityContext = EntityMethod.ExecutePlaneMethod(inputContext, generalContext, logger) as ICorpusPlaneContext; IVectorPlaneContext corpusContext = CorpusMethod.ExecutePlaneMethod(entityContext, generalContext, logger) as IVectorPlaneContext; IFeaturePlaneContext vectorContext = VectorMethod.ExecutePlaneMethod(corpusContext, generalContext, logger) as IFeaturePlaneContext; IFeaturePlaneContext featureContext = FeatureMethod.ExecutePlaneMethod(vectorContext, generalContext, logger) as IFeaturePlaneContext; // --- the results reporting var evaluationMetrics = generalContext.truthTable.EvaluateTestResultsToMetricSet(featureContext.testResults, generalContext.runName + "-" + notes.folder.name, logger); DataTableTypeExtended <classificationEval> inclassEvalTable = new DataTableTypeExtended <classificationEval>("inclass_evaluation", "Test results, per class"); evaluationMetrics.GetAllEntries().ForEach(x => inclassEvalTable.AddRow(x)); inclassEvalTable.AddRow(evaluationMetrics.GetSummary("Sum")); notes.SaveDataTable(inclassEvalTable, notes.folder_classification); classificationReport averagedReport = new classificationReport(evaluationMetrics, generalContext.averagingMethod); averagedReport.Classifier = FeatureMethod.classifier.name; averagedReport.saveObjectToXML(notes.folder_classification.pathFor(averagedReport.Name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized classification evaluation results summary")); generalContext.testSummaries.Add(averagedReport); averagedReport.ReportToLog(notes); featureContext.provider.Dispose(); EntityMethod.CacheProvider.Dispose(); return(generalContext); }
/// <summary> /// Generates feature vectors /// </summary> /// <param name="inputContext">The input context - related to this plane.</param> /// <param name="generalContext">General execution context, attached to the <see cref="T:imbNLP.Toolkit.Planes.PlanesMethodDesign" /></param> /// <param name="logger">The logger.</param> /// <returns> /// Retur /// </returns> public IPlaneContext ExecutePlaneMethod(IPlaneContext inputContext, ExperimentModelExecutionContext generalContext, ILogBuilder logger) { notes.logStartPhase("[3] Vector Plane - execution", ""); IVectorPlaneContext context = (IVectorPlaneContext)inputContext; FeaturePlaneContext finalContext = new FeaturePlaneContext(); finalContext.provider.StoreAndReceive(context); ICorpusPlaneContext corpusContext = finalContext.provider.GetContext <CorpusPlaneContext>(); // deploying feature vector space constructor featureSpaceConstructor.Deploy(constructorSettings, context.vectorSpace); featureSpaceConstructor.Deploy(constructorSettings, corpusContext.SelectedFeatures); Dictionary <string, FeatureVector> docByName = new Dictionary <string, FeatureVector>(); notes.log(":: Constructing feature vectors"); // constructing the feature vectors foreach (IVector vector in context.vectorSpace.documents) { var fv = featureSpaceConstructor.ConstructFeatureVector(vector); docByName.Add(fv.name, fv); finalContext.featureSpace.documents.Add(fv); } foreach (var link in context.LabelToDocumentLinks.links) { finalContext.featureSpace.labelToDocumentAssociations.Add(docByName[link.NodeB.name], link.NodeA, 1); } if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_featureVectors)) { var dt = finalContext.featureSpace.MakeTable(featureSpaceConstructor, "FeatureSpace", "Feature space"); notes.SaveDataTable(dt, notes.folder_feature); } notes.logEndPhase(); return(finalContext); }
/// <summary> /// Executes the plane method, invoking contained functions according to the settings /// </summary> /// <param name="inputContext">The input context - related to this plane.</param> /// <param name="generalContext">General execution context, attached to the <see cref="T:imbNLP.Toolkit.Planes.PlanesMethodDesign" /></param> /// <param name="logger">The logger.</param> /// <returns> /// Retur /// </returns> public IPlaneContext ExecutePlaneMethod(IPlaneContext inputContext, ExperimentModelExecutionContext generalContext, ILogBuilder logger) { notes.logStartPhase("[2] Corpus Plane - execution", ""); ICorpusPlaneContext context = (ICorpusPlaneContext)inputContext; VectorPlaneContext outputContext = new VectorPlaneContext(); outputContext.provider.StoreAndReceive(context); context.stemmContext = new StemmingContext(stemmer); Dictionary <String, SpaceDocumentModel> documentVsModel = new Dictionary <string, SpaceDocumentModel>(); // modelling the documents foreach (TextDocument doc in context.corpus_documents) { SpaceDocumentModel model = spaceConstructor.ConstructDocument(doc.content, doc.name, context.space, context.stemmContext, tokenizer); List <SpaceLabel> labels = spaceConstructor.GetLabels(doc.labels, context.space); Boolean isUnknownLabel = true; foreach (SpaceLabel label in labels) { if (label.name != SpaceLabel.UNKNOWN) { isUnknownLabel = false; } context.space.LabelToDocumentLinks.Add(label, model, 1); } context.space.documents.Add(model); if (!isUnknownLabel) { context.space.terms.MergeDictionary(model.terms); } documentVsModel.Add(doc.name, model); } if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_fold_textrender)) { foreach (TextDocument doc in context.corpus_documents) { String prefix = doc.labels.FirstOrDefault(); if (prefix.isNullOrEmpty()) { prefix = SpaceLabel.UNKNOWN; } String fn = prefix + "_" + doc.name; String pth = notes.folder_entity.pathFor(fn.getFilename("txt"), imbSCI.Data.enums.getWritableFileMode.overwrite, "Textual representation of website [" + doc.name + "], produced by rendering and blending settings", true); doc.content.saveStringToFile(pth, imbSCI.Data.enums.getWritableFileMode.overwrite); } } if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_fold_stats)) { foreach (WebSiteDocumentsSet ds in context.dataset) { DataTable dt = ds.MakeTable(documentVsModel); notes.SaveDataTable(dt, notes.folder_entity); } var dt_vsm = context.space.LabelToDocumentLinks.MakeTable("LabelToDocument", "Relationships between labels and documents in the primary Vector Space Model"); notes.SaveDataTable(dt_vsm, notes.folder_corpus); } if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_corpusDictionary)) { notes.SaveDataTable(context.space.terms.MakeTable("corpus_stats", "Training set dictionary, after stemming", generalContext.DictionaryReportLimit), notes.folder_corpus); } #region SELECTING THE FEATURES // forming corpus global weight context.SelectedFeatures = new WeightDictionary(); List <KeyValuePair <string, double> > filter_result = filter.SelectFeatures(context.space); List <string> FV = new List <string>(); FV.AddRange(filter_result.Select(x => x.Key)); if (filter_result.Any()) { foreach (var pair in filter_result) { context.SelectedFeatures.AddEntry(pair.Key, pair.Value); } if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_selectedFeatures)) { notes.SaveDataTable(context.SelectedFeatures.MakeTable("selected_features", "Features selected for BoW construction", new List <string>() { filter.function.shortName }, generalContext.DictionaryReportLimit), notes.folder_corpus); } } else { logger.log("-- Feature selection function returned zero set. All features [" + context.space.terms.Count + "] are therefore accepted as selected."); var tkns = context.space.terms.GetTokens(); foreach (var tkn in tkns) { context.SelectedFeatures.AddEntry(tkn, 1); } } #endregion notes.log("Selected features [" + context.SelectedFeatures.entries.Count + "] by [" + filter.functionSettings.functionName + "]"); //context.space = //weightModel.Deploy(); outputContext.vectorSpace = new Vectors.VectorSpace(); foreach (SpaceLabel label in context.space.labels) { var docs = context.space.LabelToDocumentLinks.GetAllLinked(label); if (label.name != SpaceLabel.UNKNOWN) { SpaceCategoryModel categoryModel = new SpaceCategoryModel(label, docs); context.space.LabelToCategoryLinks.Add(label, categoryModel, 1); context.space.categories.Add(categoryModel); notes.log("Class [" + categoryModel.name + "] BoW model created - terms[" + categoryModel.terms.Count + "] "); } } outputContext.LabelToDocumentLinks = context.space.LabelToDocumentLinks; // preparing the model weightModel.PrepareTheModel(context.space); // logger.log(":: Creating VectorSpace instances for documents"); // building document VSM foreach (SpaceDocumentModel docModel in context.space.documents) { var wd = weightModel.GetWeights(FV, docModel, context.space); VectorDocument docVec = new VectorDocument(docModel.name); docVec.terms = wd; if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_documentBoWModels)) { DataTable dt = wd.MakeTable("docVec_" + docModel.name, "Document vector model", null, 10000); notes.SaveDataTable(dt, notes.folder_vector); } outputContext.vectorSpace.documents.Add(docVec); } // logger.log(":: Creating VectorSpace instances for categories"); // building category VSM foreach (SpaceCategoryModel catModel in context.space.categories) { var wd = weightModel.GetWeights(FV, catModel, context.space); VectorLabel catVec = new VectorLabel(catModel.name); catVec.terms = wd; if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_documentBoWModels)) { DataTable dt = wd.MakeTable("catVec_" + catModel.name, "Document vector model", null, 10000); notes.SaveDataTable(dt, notes.folder_vector); } outputContext.vectorSpace.labels.Add(catVec); } if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_documentBoWModels)) { foreach (SpaceCategoryModel catModel in context.space.categories) { var dt = catModel.terms.MakeTable("cat_" + catModel.name, "Vector Space BoW weighted model, representing a category"); notes.SaveDataTable(dt, notes.folder_vector); } } notes.logEndPhase(); return(outputContext); }