Ejemplo n.º 1
0
        public IPlaneContext ExecutePlaneMethod(IPlaneContext inputContext, ExperimentModelExecutionContext generalContext, ILogBuilder logger)
        {
            //if (generalContext == null)
            //{
            //    generalContext = new PlanesMethodContext();
            //}
            IEntityPlaneContext entityInputContext = inputContext as IEntityPlaneContext;


            ICorpusPlaneContext entityContext = EntityMethod.ExecutePlaneMethod(inputContext, generalContext, logger) as ICorpusPlaneContext;

            IVectorPlaneContext corpusContext = CorpusMethod.ExecutePlaneMethod(entityContext, generalContext, logger) as IVectorPlaneContext;

            IFeaturePlaneContext vectorContext = VectorMethod.ExecutePlaneMethod(corpusContext, generalContext, logger) as IFeaturePlaneContext;

            IFeaturePlaneContext featureContext = FeatureMethod.ExecutePlaneMethod(vectorContext, generalContext, logger) as IFeaturePlaneContext;

            // --- the results reporting

            var evaluationMetrics = generalContext.truthTable.EvaluateTestResultsToMetricSet(featureContext.testResults, generalContext.runName + "-" + notes.folder.name, logger);

            DataTableTypeExtended <classificationEval> inclassEvalTable = new DataTableTypeExtended <classificationEval>("inclass_evaluation", "Test results, per class");

            evaluationMetrics.GetAllEntries().ForEach(x => inclassEvalTable.AddRow(x));
            inclassEvalTable.AddRow(evaluationMetrics.GetSummary("Sum"));
            notes.SaveDataTable(inclassEvalTable, notes.folder_classification);

            classificationReport averagedReport = new classificationReport(evaluationMetrics, generalContext.averagingMethod);

            averagedReport.Classifier = FeatureMethod.classifier.name;
            averagedReport.saveObjectToXML(notes.folder_classification.pathFor(averagedReport.Name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized classification evaluation results summary"));

            generalContext.testSummaries.Add(averagedReport);

            averagedReport.ReportToLog(notes);

            featureContext.provider.Dispose();
            EntityMethod.CacheProvider.Dispose();

            return(generalContext);
        }
        /// <summary>
        /// Generates feature vectors
        /// </summary>
        /// <param name="inputContext">The input context - related to this plane.</param>
        /// <param name="generalContext">General execution context, attached to the <see cref="T:imbNLP.Toolkit.Planes.PlanesMethodDesign" /></param>
        /// <param name="logger">The logger.</param>
        /// <returns>
        /// Retur
        /// </returns>
        public IPlaneContext ExecutePlaneMethod(IPlaneContext inputContext, ExperimentModelExecutionContext generalContext, ILogBuilder logger)
        {
            notes.logStartPhase("[3] Vector Plane - execution", "");

            IVectorPlaneContext context      = (IVectorPlaneContext)inputContext;
            FeaturePlaneContext finalContext = new FeaturePlaneContext();

            finalContext.provider.StoreAndReceive(context);

            ICorpusPlaneContext corpusContext = finalContext.provider.GetContext <CorpusPlaneContext>();

            // deploying feature vector space constructor
            featureSpaceConstructor.Deploy(constructorSettings, context.vectorSpace);
            featureSpaceConstructor.Deploy(constructorSettings, corpusContext.SelectedFeatures);

            Dictionary <string, FeatureVector> docByName = new Dictionary <string, FeatureVector>();

            notes.log(":: Constructing feature vectors");
            // constructing the feature vectors
            foreach (IVector vector in context.vectorSpace.documents)
            {
                var fv = featureSpaceConstructor.ConstructFeatureVector(vector);
                docByName.Add(fv.name, fv);
                finalContext.featureSpace.documents.Add(fv);
            }

            foreach (var link in context.LabelToDocumentLinks.links)
            {
                finalContext.featureSpace.labelToDocumentAssociations.Add(docByName[link.NodeB.name], link.NodeA, 1);
            }

            if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_featureVectors))
            {
                var dt = finalContext.featureSpace.MakeTable(featureSpaceConstructor, "FeatureSpace", "Feature space");
                notes.SaveDataTable(dt, notes.folder_feature);
            }

            notes.logEndPhase();

            return(finalContext);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Executes the plane method, invoking contained functions according to the settings
        /// </summary>
        /// <param name="inputContext">The input context - related to this plane.</param>
        /// <param name="generalContext">General execution context, attached to the <see cref="T:imbNLP.Toolkit.Planes.PlanesMethodDesign" /></param>
        /// <param name="logger">The logger.</param>
        /// <returns>
        /// Retur
        /// </returns>
        public IPlaneContext ExecutePlaneMethod(IPlaneContext inputContext, ExperimentModelExecutionContext generalContext, ILogBuilder logger)
        {
            notes.logStartPhase("[2] Corpus Plane - execution", "");

            ICorpusPlaneContext context       = (ICorpusPlaneContext)inputContext;
            VectorPlaneContext  outputContext = new VectorPlaneContext();

            outputContext.provider.StoreAndReceive(context);

            context.stemmContext = new StemmingContext(stemmer);

            Dictionary <String, SpaceDocumentModel> documentVsModel = new Dictionary <string, SpaceDocumentModel>();

            // modelling the documents
            foreach (TextDocument doc in context.corpus_documents)
            {
                SpaceDocumentModel model          = spaceConstructor.ConstructDocument(doc.content, doc.name, context.space, context.stemmContext, tokenizer);
                List <SpaceLabel>  labels         = spaceConstructor.GetLabels(doc.labels, context.space);
                Boolean            isUnknownLabel = true;
                foreach (SpaceLabel label in labels)
                {
                    if (label.name != SpaceLabel.UNKNOWN)
                    {
                        isUnknownLabel = false;
                    }
                    context.space.LabelToDocumentLinks.Add(label, model, 1);
                }
                context.space.documents.Add(model);
                if (!isUnknownLabel)
                {
                    context.space.terms.MergeDictionary(model.terms);
                }

                documentVsModel.Add(doc.name, model);
            }

            if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_fold_textrender))
            {
                foreach (TextDocument doc in context.corpus_documents)
                {
                    String prefix = doc.labels.FirstOrDefault();
                    if (prefix.isNullOrEmpty())
                    {
                        prefix = SpaceLabel.UNKNOWN;
                    }

                    String fn  = prefix + "_" + doc.name;
                    String pth = notes.folder_entity.pathFor(fn.getFilename("txt"), imbSCI.Data.enums.getWritableFileMode.overwrite, "Textual representation of website [" + doc.name + "], produced by rendering and blending settings", true);
                    doc.content.saveStringToFile(pth, imbSCI.Data.enums.getWritableFileMode.overwrite);
                }
            }

            if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_fold_stats))
            {
                foreach (WebSiteDocumentsSet ds in context.dataset)
                {
                    DataTable dt = ds.MakeTable(documentVsModel);
                    notes.SaveDataTable(dt, notes.folder_entity);
                }



                var dt_vsm = context.space.LabelToDocumentLinks.MakeTable("LabelToDocument", "Relationships between labels and documents in the primary Vector Space Model");
                notes.SaveDataTable(dt_vsm, notes.folder_corpus);
            }

            if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_corpusDictionary))
            {
                notes.SaveDataTable(context.space.terms.MakeTable("corpus_stats", "Training set dictionary, after stemming", generalContext.DictionaryReportLimit), notes.folder_corpus);
            }



            #region SELECTING THE FEATURES
            // forming corpus global weight
            context.SelectedFeatures = new WeightDictionary();
            List <KeyValuePair <string, double> > filter_result = filter.SelectFeatures(context.space);
            List <string> FV = new List <string>();
            FV.AddRange(filter_result.Select(x => x.Key));

            if (filter_result.Any())
            {
                foreach (var pair in filter_result)
                {
                    context.SelectedFeatures.AddEntry(pair.Key, pair.Value);
                }

                if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_selectedFeatures))
                {
                    notes.SaveDataTable(context.SelectedFeatures.MakeTable("selected_features", "Features selected for BoW construction", new List <string>()
                    {
                        filter.function.shortName
                    }, generalContext.DictionaryReportLimit), notes.folder_corpus);
                }
            }
            else
            {
                logger.log("-- Feature selection function returned zero set. All features [" + context.space.terms.Count + "] are therefore accepted as selected.");
                var tkns = context.space.terms.GetTokens();
                foreach (var tkn in tkns)
                {
                    context.SelectedFeatures.AddEntry(tkn, 1);
                }
            }
            #endregion


            notes.log("Selected features [" + context.SelectedFeatures.entries.Count + "] by [" + filter.functionSettings.functionName + "]");



            //context.space =
            //weightModel.Deploy();

            outputContext.vectorSpace = new Vectors.VectorSpace();


            foreach (SpaceLabel label in context.space.labels)
            {
                var docs = context.space.LabelToDocumentLinks.GetAllLinked(label);
                if (label.name != SpaceLabel.UNKNOWN)
                {
                    SpaceCategoryModel categoryModel = new SpaceCategoryModel(label, docs);
                    context.space.LabelToCategoryLinks.Add(label, categoryModel, 1);

                    context.space.categories.Add(categoryModel);

                    notes.log("Class [" + categoryModel.name + "] BoW model created - terms[" + categoryModel.terms.Count + "] ");
                }
            }

            outputContext.LabelToDocumentLinks = context.space.LabelToDocumentLinks;

            // preparing the model
            weightModel.PrepareTheModel(context.space);

            // logger.log(":: Creating VectorSpace instances for documents");
            // building document VSM
            foreach (SpaceDocumentModel docModel in context.space.documents)
            {
                var            wd     = weightModel.GetWeights(FV, docModel, context.space);
                VectorDocument docVec = new VectorDocument(docModel.name);
                docVec.terms = wd;

                if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_documentBoWModels))
                {
                    DataTable dt = wd.MakeTable("docVec_" + docModel.name, "Document vector model", null, 10000);
                    notes.SaveDataTable(dt, notes.folder_vector);
                }
                outputContext.vectorSpace.documents.Add(docVec);
            }

            // logger.log(":: Creating VectorSpace instances for categories");
            // building category VSM
            foreach (SpaceCategoryModel catModel in context.space.categories)
            {
                var         wd     = weightModel.GetWeights(FV, catModel, context.space);
                VectorLabel catVec = new VectorLabel(catModel.name);
                catVec.terms = wd;

                if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_documentBoWModels))
                {
                    DataTable dt = wd.MakeTable("catVec_" + catModel.name, "Document vector model", null, 10000);
                    notes.SaveDataTable(dt, notes.folder_vector);
                }

                outputContext.vectorSpace.labels.Add(catVec);
            }

            if (generalContext.reportOptions.HasFlag(PlanesReportOptions.report_documentBoWModels))
            {
                foreach (SpaceCategoryModel catModel in context.space.categories)
                {
                    var dt = catModel.terms.MakeTable("cat_" + catModel.name, "Vector Space BoW weighted model, representing a category");
                    notes.SaveDataTable(dt, notes.folder_vector);
                }
            }


            notes.logEndPhase();

            return(outputContext);
        }