/// <summary> /// Spaces the model categories. /// </summary> /// <param name="context">The context.</param> /// <param name="log">The log.</param> public void SpaceModelCategories(OperationContext context, ILogBuilder log) { log.log("Space model categories"); foreach (SpaceLabel label in context.spaceModel.labels) { if (label.name != SpaceLabel.UNKNOWN) { var docs = context.spaceModel.LabelToDocumentLinks.GetAllLinked(label); SpaceCategoryModel categoryModel = new SpaceCategoryModel(label, docs); context.spaceModel.LabelToCategoryLinks.Add(label, categoryModel, 1); context.spaceModel.categories.Add(categoryModel); // notes.log("Class [" + categoryModel.name + "] BoW model created - terms[" + categoryModel.terms.Count + "] "); } } }
/// <summary> /// Executes the plane method, invoking contained functions according to the settings /// </summary> /// <param name="inputContext">The input context - related to this plane.</param> /// <param name="generalContext">General execution context, attached to the <see cref="T:imbNLP.Toolkit.Planes.PlanesMethodDesign" /></param> /// <param name="logger">The logger.</param> /// <returns> /// Retur /// </returns> public IPlaneContext ExecutePlaneMethod(IPlaneContext inputContext, ExperimentModelExecutionContext generalContext, ILogBuilder logger) { notes.logStartPhase("[2] Corpus Plane - execution", ""); ICorpusPlaneContext context = (ICorpusPlaneContext)inputContext; VectorPlaneContext outputContext = new VectorPlaneContext(); outputContext.provider.StoreAndReceive(context); context.stemmContext = new StemmingContext(stemmer); Dictionary <String, SpaceDocumentModel> documentVsModel = new Dictionary <string, SpaceDocumentModel>(); // modelling the documents foreach (TextDocument doc in context.corpus_documents) { SpaceDocumentModel model = spaceConstructor.ConstructDocument(doc.content, doc.name, context.space, context.stemmContext, tokenizer); List <SpaceLabel> labels = spaceConstructor.GetLabels(doc.labels, context.space); Boolean isUnknownLabel = true; foreach (SpaceLabel label in labels) { if (label.name != SpaceLabel.UNKNOWN) { isUnknownLabel = false; } context.space.LabelToDocumentLinks.Add(label, model, 1); } context.space.documents.Add(model); if (!isUnknownLabel) { context.space.terms.MergeDictionary(model.terms); } documentVsModel.Add(doc.name, model); } if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_fold_textrender)) { foreach (TextDocument doc in context.corpus_documents) { String prefix = doc.labels.FirstOrDefault(); if (prefix.isNullOrEmpty()) { prefix = SpaceLabel.UNKNOWN; } String fn = prefix + "_" + doc.name; String pth = notes.folder_entity.pathFor(fn.getFilename("txt"), imbSCI.Data.enums.getWritableFileMode.overwrite, "Textual representation of website [" + doc.name + "], produced by rendering and blending settings", true); doc.content.saveStringToFile(pth, imbSCI.Data.enums.getWritableFileMode.overwrite); } } if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_fold_stats)) { foreach (WebSiteDocumentsSet ds in context.dataset) { DataTable dt = ds.MakeTable(documentVsModel); notes.SaveDataTable(dt, notes.folder_entity); } var dt_vsm = context.space.LabelToDocumentLinks.MakeTable("LabelToDocument", "Relationships between labels and documents in the primary Vector Space Model"); notes.SaveDataTable(dt_vsm, notes.folder_corpus); } if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_corpusDictionary)) { notes.SaveDataTable(context.space.terms.MakeTable("corpus_stats", "Training set dictionary, after stemming", generalContext.DictionaryReportLimit), notes.folder_corpus); } #region SELECTING THE FEATURES // forming corpus global weight context.SelectedFeatures = new WeightDictionary(); List <KeyValuePair <string, double> > filter_result = filter.SelectFeatures(context.space); List <string> FV = new List <string>(); FV.AddRange(filter_result.Select(x => x.Key)); if (filter_result.Any()) { foreach (var pair in filter_result) { context.SelectedFeatures.AddEntry(pair.Key, pair.Value); } if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_selectedFeatures)) { notes.SaveDataTable(context.SelectedFeatures.MakeTable("selected_features", "Features selected for BoW construction", new List <string>() { filter.function.shortName }, generalContext.DictionaryReportLimit), notes.folder_corpus); } } else { logger.log("-- Feature selection function returned zero set. All features [" + context.space.terms.Count + "] are therefore accepted as selected."); var tkns = context.space.terms.GetTokens(); foreach (var tkn in tkns) { context.SelectedFeatures.AddEntry(tkn, 1); } } #endregion notes.log("Selected features [" + context.SelectedFeatures.entries.Count + "] by [" + filter.functionSettings.functionName + "]"); //context.space = //weightModel.Deploy(); outputContext.vectorSpace = new Vectors.VectorSpace(); foreach (SpaceLabel label in context.space.labels) { var docs = context.space.LabelToDocumentLinks.GetAllLinked(label); if (label.name != SpaceLabel.UNKNOWN) { SpaceCategoryModel categoryModel = new SpaceCategoryModel(label, docs); context.space.LabelToCategoryLinks.Add(label, categoryModel, 1); context.space.categories.Add(categoryModel); notes.log("Class [" + categoryModel.name + "] BoW model created - terms[" + categoryModel.terms.Count + "] "); } } outputContext.LabelToDocumentLinks = context.space.LabelToDocumentLinks; // preparing the model weightModel.PrepareTheModel(context.space); // logger.log(":: Creating VectorSpace instances for documents"); // building document VSM foreach (SpaceDocumentModel docModel in context.space.documents) { var wd = weightModel.GetWeights(FV, docModel, context.space); VectorDocument docVec = new VectorDocument(docModel.name); docVec.terms = wd; if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_documentBoWModels)) { DataTable dt = wd.MakeTable("docVec_" + docModel.name, "Document vector model", null, 10000); notes.SaveDataTable(dt, notes.folder_vector); } outputContext.vectorSpace.documents.Add(docVec); } // logger.log(":: Creating VectorSpace instances for categories"); // building category VSM foreach (SpaceCategoryModel catModel in context.space.categories) { var wd = weightModel.GetWeights(FV, catModel, context.space); VectorLabel catVec = new VectorLabel(catModel.name); catVec.terms = wd; if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_documentBoWModels)) { DataTable dt = wd.MakeTable("catVec_" + catModel.name, "Document vector model", null, 10000); notes.SaveDataTable(dt, notes.folder_vector); } outputContext.vectorSpace.labels.Add(catVec); } if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_documentBoWModels)) { foreach (SpaceCategoryModel catModel in context.space.categories) { var dt = catModel.terms.MakeTable("cat_" + catModel.name, "Vector Space BoW weighted model, representing a category"); notes.SaveDataTable(dt, notes.folder_vector); } } notes.logEndPhase(); return(outputContext); }