/// <summary>
        /// Prepares the specified context.
        /// </summary>
        /// <param name="context">The context.</param>
        /// <param name="log">The log.</param>
        /// <exception cref="ArgumentException">context</exception>
        public override void Prepare(DocumentSelectResult context, ILogBuilder log)
        {
            /*
             * String p_m = WeightDictionary.GetDictionaryFilename(, context.folder);
             *
             * if (File.Exists(p_m))
             * {
             *   //objectSerialization.loadObjectFromXML<WeightDictionary>(p_m, log);
             *
             * }
             */
            weightDictionary = WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(dictionaryFile, context.folder), log);

            if (context.spaceModel == null)
            {
                String msg = "Error: TermWeight factor requires SpaceModel declared in the context for operation";
                throw new ArgumentException(msg, nameof(context));
            }



            if (context.query.isNullOrEmpty())
            {
                context.query.QueryTerms = context.query.QueryTerms.Trim();

                List <String> tkns = context.query.QueryTerms.getTokens(true, true, true, false, 4);

                foreach (String tkn in tkns)
                {
                    queryTerms.Add(context.stemmingContext.Stem(tkn));
                }
            }
        }
        /// <summary>
        /// Prepares the specified context.
        /// </summary>
        /// <param name="context">The context.</param>
        /// <param name="log">The log.</param>
        /// <exception cref="ArgumentException">context</exception>
        public override void Prepare(DocumentSelectResult context, ILogBuilder log)
        {
            String p_m = FeatureWeightModel.GetModelDefinitionFilename(modelDefinitionFile, context.folder);
            String p_d = FeatureWeightModel.GetModelDataFilename(modelDefinitionFile, context.folder);

            TermWeightModel = FeatureWeightModel.LoadModel(p_m, log);

            //if (File.Exists(p_m))
            //{

            //    //TermWeightModel = objectSerialization.loadObjectFromXML<FeatureWeightModel>(p_m, log);
            //}

            TermWeightModel.Deploy(log);

            if (context.spaceModel == null)
            {
                String msg = "Error: TermWeight factor requires SpaceModel declared in the context for operation";
                throw new ArgumentException(msg, nameof(context));
            }

            if (File.Exists(p_d) && useStoredData)
            {
                WeightingModelDataSet data = objectSerialization.loadObjectFromXML <WeightingModelDataSet>(p_d, log);
                TermWeightModel.LoadModelDataSet(data, log);

                if (useSelectedFeatures)
                {
                    SelectedTerms = WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(modelDefinitionFile + "_sf", context.folder), log);
                }
            }
            else
            {
                TermWeightModel.PrepareTheModel(context.spaceModel, log);
            }

            if (context.query.isNullOrEmpty())
            {
                context.query.QueryTerms = context.query.QueryTerms.Trim();

                List <String> tkns = context.query.QueryTerms.getTokens(true, true, true, false, 4);

                foreach (String tkn in tkns)
                {
                    queryTerms.Add(context.stemmingContext.Stem(tkn));
                }
            }
        }
        public void DeployAndRun(ILogBuilder log, SpaceModel _space, folderNode folder)
        {
            filter.Deploy(log, folder);

            weightedFeatures = new WeightDictionary(name + "_weg" + filter.limit.ToString(), "weighted features, before filter");
            selectedFeatures = new WeightDictionary(name + "_sel" + filter.limit.ToString(), "selected weighted featyres");

            var selected = filter.SelectFeatures(_space, log, folder, weightedFeatures);

            foreach (var pair in selected)
            {
                selectedFeatures.AddEntry(pair.Key, pair.Value);
            }

            weightedFeatures.Save(folder, log, WeightDictionary.GetDictionaryFilename(weightedFeatures.name, folder));
            selectedFeatures.Save(folder, log, WeightDictionary.GetDictionaryFilename(selectedFeatures.name, folder));
        }
        /// <summary>
        /// Prepares the specified context.
        /// </summary>
        /// <param name="context">The context.</param>
        /// <param name="log">The log.</param>
        /// <exception cref="ArgumentException">context</exception>
        public override void Prepare(DocumentSelectResult context, ILogBuilder log)
        {
            //context.folder.GetOrFindFiles("*", dictionaryFile + "*.xml");

            scoreDictionary = FeatureVectorDictionaryWithDimensions.LoadFile(context.folder, dictionaryFile, log); // WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(dictionaryFile, context.folder), log);

            if (scoreDictionary == null)
            {
                String msg = "Error: Failed to find score dictionary [" + dictionaryFile + "] in " + context.folder.path;
                throw new ArgumentException(msg, nameof(context));
            }

            if (useMachineLearning)
            {
                #region --------------- PREPARING TERM WEIGHT MODEL


                String p_m = FeatureWeightModel.GetModelDefinitionFilename(modelDefinitionFile, context.folder);
                String p_d = FeatureWeightModel.GetModelDataFilename(modelDefinitionFile, context.folder);


                if (TermWeightModel == null)
                {
                    TermWeightModel = FeatureWeightModel.LoadModel(p_m, log);
                }


                TermWeightModel.Deploy(log);

                if (context.spaceModel == null)
                {
                    String msg = "Error: TermWeight factor requires SpaceModel declared in the context for operation";
                    throw new ArgumentException(msg, nameof(context));
                }



                if (File.Exists(p_d) && useStoredData)
                {
                    WeightingModelDataSet data = objectSerialization.loadObjectFromXML <WeightingModelDataSet>(p_d, log);
                    TermWeightModel.LoadModelDataSet(data, log);

                    if (useSelectedFeatures)
                    {
                        SelectedTerms = WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(modelDefinitionFile + "_sf", context.folder), log);
                    }
                }
                else
                {
                    TermWeightModel.PrepareTheModel(context.spaceModel, log);
                }

                if (SelectedTerms.Count == 0)
                {
                    SelectedTerms = context.selectedFeatures;
                }
                List <String> sel_tkns = new List <String>();

                sel_tkns.AddRange(SelectedTerms.index.Values.Select(x => x.name));

                if (!sel_tkns.Any())
                {
                    sel_tkns.AddRange(context.spaceModel.terms_known_label.GetTokens());
                }


                #endregion

                fvConstructor.Deploy(featureMethod.constructor, sel_tkns);



                classifier = featureMethod.classifierSettings.GetClassifier();

                sc_id = scoreDictionary.GetVectorsWithLabelID(null, criterion).ToNameVsLabelID();


                List <FeatureVectorWithLabelID> trainingSet = new List <FeatureVectorWithLabelID>();
                foreach (var item in context.items)
                {
                    if (sc_id.ContainsKey(item.AssignedID))
                    {
                        WeightDictionary dc_vec = TermWeightModel.GetWeights(sel_tkns, item.spaceDocument, context.spaceModel);


                        var n_vec = fvConstructor.ConstructFeatureVector(dc_vec, item.AssignedID);

                        FeatureVectorWithLabelID id_vec = new FeatureVectorWithLabelID(n_vec, sc_id[item.AssignedID]);

                        trainingSet.Add(id_vec);
                    }
                }


                log.log("Training [" + classifier.name + "] with [" + sc_id.Count + "] feature vectors.");
                classifier.DoTraining(trainingSet, log);
            }
        }
        public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null)
        {
            ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain);

            Open();

            String p_m = FeatureWeightModel.GetModelDefinitionFilename(setup.OutputFilename, fold_notes.folder);

            String p_d = FeatureWeightModel.GetModelDataFilename(setup.OutputFilename, fold_notes.folder);

            String w_t = WeightDictionary.GetDictionaryFilename(setup.OutputFilename, fold_notes.folder);

            Boolean skip = false;

            if (setup.skipIfExisting)
            {
                if (File.Exists(p_m) && File.Exists(p_d) && File.Exists(w_t))
                {
                    logger.log("WeightTable [" + p_d + "] found, skipping the operation");
                    skip = true;
                }
            }

            if (!skip)
            {
                output.context.DeployDataSet(fold, logger);

                entityOperation.TextRendering(output.context, notes);

                /*
                 * entityOperation.TextPreblendFilter(output.context, notes);
                 *
                 * entityOperation.TextBlending(output.context, notes);
                 */

                corpusOperation.SpaceModelPopulation(output.context, notes);

                corpusOperation.SpaceModelCategories(output.context, notes);

                corpusOperation.FeatureSelection(output.context, notes, requirements.MayUseSelectedFeatures);

                output.context.SelectedFeatures.Save(fold_notes.folder, notes, setup.OutputFilename + "_fs");

                //corpusOperation.weightModel.

                corpusOperation.weightModel.PrepareTheModel(output.context.spaceModel, logger);

                var wt_s = corpusOperation.weightModel.GetElementFactors(output.context.SelectedFeatures.GetKeys(), output.context.spaceModel);

                wt_s.Save(fold_notes.folder, notes, setup.OutputFilename);

                corpusOperation.weightModel.Save(setup.OutputFilename, fold_notes.folder, notes);

                OperationContextReport reportOperation = new OperationContextReport();
                reportOperation.DeploySettingsBase(notes);

                reportOperation.GenerateReports(output.context, setup.reportOptions, notes);
            }

            Close();

            return(output);
        }
Esempio n. 6
0
        public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null)
        {
            ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain);

            Open();

            Boolean skip = false;

            //  String fn = setup.OutputFilename;

            String p_m = WeightDictionary.GetDictionaryFilename(setup.OutputFilename, fold_notes.folder);  //FeatureWeightModel.GetModelDefinitionFilename(setup.OutputFilename, fold_notes.folder);

            //String p_d = FeatureWeightModel.GetModelDataFilename(setup.OutputFilename, fold_notes.folder);


            if (setup.skipIfExisting)
            {
                if (File.Exists(p_m))
                {
                    logger.log("WeightTable [" + p_m + "] found, skipping the operation");
                    skip = true;
                }
            }



            if (!skip)
            {
                notes.log("Rendering primary view");

                // ------------------- PRIMARY CONTEXT

                output.context.DeployDataSet(fold, logger);

                primaryEntityOperation.TextRendering(output.context, notes);

                //primaryEntityOperation.TextPreblendFilter(output.context, notes);

                //primaryEntityOperation.TextBlending(output.context, notes);


                corpusOperation.SpaceModelPopulation(output.context, notes);

                corpusOperation.SpaceModelCategories(output.context, notes);

                corpusOperation.FeatureSelection(output.context, notes, requirements.MayUseSelectedFeatures);



                OperationContext primaryContext = output.context;

                // ------------------- SECONDARY CONTEXT

                output.context = new OperationContext();

                notes.log("Rendering secondary view");

                output.context.DeployDataSet(fold, logger);

                secondaryEntityOperation.TextRendering(output.context, notes);

                //  secondaryEntityOperation.TextPreblendFilter(output.context, notes);

                // secondaryEntityOperation.TextBlending(output.context, notes);

                corpusOperation.SpaceModelPopulation(output.context, notes);

                corpusOperation.SpaceModelCategories(output.context, notes);

                corpusOperation.FeatureSelection(output.context, notes, requirements.MayUseSelectedFeatures);


                OperationContext secondaryContext = output.context;



                ProjectionDictionary projectionPairs = DocumentRankingTools.ConstructPairDictionary(primaryContext.spaceModel.documents, secondaryContext.spaceModel.documents);

                DocumentSelectResult drmContext = output.context.PrepareContext(rankingOperation, fold_notes.folder, logger);
                drmContext             = rankingOperation.ExecuteEvaluation(drmContext, logger);
                drmContext.description = "Document score assigned to the primary text render" + name;
                drmContext.saveObjectToXML(fold_notes.folder.pathFor("DS_" + name + "_projection_score.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Projection within [" + name + "] operation"));

                TokenFrequencyAndScoreDictionary tokenFrequencyAndScoreDictionary = ProjectionTools.ProjectPrimaryTermsToScores(projectionPairs, drmContext, logger);

                WeightDictionary wt = tokenFrequencyAndScoreDictionary.ConstructWeightDictionary();
                wt.name        = setup.OutputFilename;
                wt.description = "Projected PrimaryView to ScoreTable - WeightTable, constructed from [" + projectionPairs.Count + "] render pairs. Document ranking: " + drmContext.description;

                wt.Save(fold_notes.folder, logger, setup.OutputFilename);

                //                wt.saveObjectToXML(p_m);
            }


            Close();

            return(output);
        }