/// <summary> /// Prepares the specified context. /// </summary> /// <param name="context">The context.</param> /// <param name="log">The log.</param> /// <exception cref="ArgumentException">context</exception> public override void Prepare(DocumentSelectResult context, ILogBuilder log) { /* * String p_m = WeightDictionary.GetDictionaryFilename(, context.folder); * * if (File.Exists(p_m)) * { * //objectSerialization.loadObjectFromXML<WeightDictionary>(p_m, log); * * } */ weightDictionary = WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(dictionaryFile, context.folder), log); if (context.spaceModel == null) { String msg = "Error: TermWeight factor requires SpaceModel declared in the context for operation"; throw new ArgumentException(msg, nameof(context)); } if (context.query.isNullOrEmpty()) { context.query.QueryTerms = context.query.QueryTerms.Trim(); List <String> tkns = context.query.QueryTerms.getTokens(true, true, true, false, 4); foreach (String tkn in tkns) { queryTerms.Add(context.stemmingContext.Stem(tkn)); } } }
/// <summary> /// Prepares the specified context. /// </summary> /// <param name="context">The context.</param> /// <param name="log">The log.</param> /// <exception cref="ArgumentException">context</exception> public override void Prepare(DocumentSelectResult context, ILogBuilder log) { String p_m = FeatureWeightModel.GetModelDefinitionFilename(modelDefinitionFile, context.folder); String p_d = FeatureWeightModel.GetModelDataFilename(modelDefinitionFile, context.folder); TermWeightModel = FeatureWeightModel.LoadModel(p_m, log); //if (File.Exists(p_m)) //{ // //TermWeightModel = objectSerialization.loadObjectFromXML<FeatureWeightModel>(p_m, log); //} TermWeightModel.Deploy(log); if (context.spaceModel == null) { String msg = "Error: TermWeight factor requires SpaceModel declared in the context for operation"; throw new ArgumentException(msg, nameof(context)); } if (File.Exists(p_d) && useStoredData) { WeightingModelDataSet data = objectSerialization.loadObjectFromXML <WeightingModelDataSet>(p_d, log); TermWeightModel.LoadModelDataSet(data, log); if (useSelectedFeatures) { SelectedTerms = WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(modelDefinitionFile + "_sf", context.folder), log); } } else { TermWeightModel.PrepareTheModel(context.spaceModel, log); } if (context.query.isNullOrEmpty()) { context.query.QueryTerms = context.query.QueryTerms.Trim(); List <String> tkns = context.query.QueryTerms.getTokens(true, true, true, false, 4); foreach (String tkn in tkns) { queryTerms.Add(context.stemmingContext.Stem(tkn)); } } }
public void DeployAndRun(ILogBuilder log, SpaceModel _space, folderNode folder) { filter.Deploy(log, folder); weightedFeatures = new WeightDictionary(name + "_weg" + filter.limit.ToString(), "weighted features, before filter"); selectedFeatures = new WeightDictionary(name + "_sel" + filter.limit.ToString(), "selected weighted featyres"); var selected = filter.SelectFeatures(_space, log, folder, weightedFeatures); foreach (var pair in selected) { selectedFeatures.AddEntry(pair.Key, pair.Value); } weightedFeatures.Save(folder, log, WeightDictionary.GetDictionaryFilename(weightedFeatures.name, folder)); selectedFeatures.Save(folder, log, WeightDictionary.GetDictionaryFilename(selectedFeatures.name, folder)); }
/// <summary> /// Prepares the specified context. /// </summary> /// <param name="context">The context.</param> /// <param name="log">The log.</param> /// <exception cref="ArgumentException">context</exception> public override void Prepare(DocumentSelectResult context, ILogBuilder log) { //context.folder.GetOrFindFiles("*", dictionaryFile + "*.xml"); scoreDictionary = FeatureVectorDictionaryWithDimensions.LoadFile(context.folder, dictionaryFile, log); // WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(dictionaryFile, context.folder), log); if (scoreDictionary == null) { String msg = "Error: Failed to find score dictionary [" + dictionaryFile + "] in " + context.folder.path; throw new ArgumentException(msg, nameof(context)); } if (useMachineLearning) { #region --------------- PREPARING TERM WEIGHT MODEL String p_m = FeatureWeightModel.GetModelDefinitionFilename(modelDefinitionFile, context.folder); String p_d = FeatureWeightModel.GetModelDataFilename(modelDefinitionFile, context.folder); if (TermWeightModel == null) { TermWeightModel = FeatureWeightModel.LoadModel(p_m, log); } TermWeightModel.Deploy(log); if (context.spaceModel == null) { String msg = "Error: TermWeight factor requires SpaceModel declared in the context for operation"; throw new ArgumentException(msg, nameof(context)); } if (File.Exists(p_d) && useStoredData) { WeightingModelDataSet data = objectSerialization.loadObjectFromXML <WeightingModelDataSet>(p_d, log); TermWeightModel.LoadModelDataSet(data, log); if (useSelectedFeatures) { SelectedTerms = WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(modelDefinitionFile + "_sf", context.folder), log); } } else { TermWeightModel.PrepareTheModel(context.spaceModel, log); } if (SelectedTerms.Count == 0) { SelectedTerms = context.selectedFeatures; } List <String> sel_tkns = new List <String>(); sel_tkns.AddRange(SelectedTerms.index.Values.Select(x => x.name)); if (!sel_tkns.Any()) { sel_tkns.AddRange(context.spaceModel.terms_known_label.GetTokens()); } #endregion fvConstructor.Deploy(featureMethod.constructor, sel_tkns); classifier = featureMethod.classifierSettings.GetClassifier(); sc_id = scoreDictionary.GetVectorsWithLabelID(null, criterion).ToNameVsLabelID(); List <FeatureVectorWithLabelID> trainingSet = new List <FeatureVectorWithLabelID>(); foreach (var item in context.items) { if (sc_id.ContainsKey(item.AssignedID)) { WeightDictionary dc_vec = TermWeightModel.GetWeights(sel_tkns, item.spaceDocument, context.spaceModel); var n_vec = fvConstructor.ConstructFeatureVector(dc_vec, item.AssignedID); FeatureVectorWithLabelID id_vec = new FeatureVectorWithLabelID(n_vec, sc_id[item.AssignedID]); trainingSet.Add(id_vec); } } log.log("Training [" + classifier.name + "] with [" + sc_id.Count + "] feature vectors."); classifier.DoTraining(trainingSet, log); } }
public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null) { ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain); Open(); String p_m = FeatureWeightModel.GetModelDefinitionFilename(setup.OutputFilename, fold_notes.folder); String p_d = FeatureWeightModel.GetModelDataFilename(setup.OutputFilename, fold_notes.folder); String w_t = WeightDictionary.GetDictionaryFilename(setup.OutputFilename, fold_notes.folder); Boolean skip = false; if (setup.skipIfExisting) { if (File.Exists(p_m) && File.Exists(p_d) && File.Exists(w_t)) { logger.log("WeightTable [" + p_d + "] found, skipping the operation"); skip = true; } } if (!skip) { output.context.DeployDataSet(fold, logger); entityOperation.TextRendering(output.context, notes); /* * entityOperation.TextPreblendFilter(output.context, notes); * * entityOperation.TextBlending(output.context, notes); */ corpusOperation.SpaceModelPopulation(output.context, notes); corpusOperation.SpaceModelCategories(output.context, notes); corpusOperation.FeatureSelection(output.context, notes, requirements.MayUseSelectedFeatures); output.context.SelectedFeatures.Save(fold_notes.folder, notes, setup.OutputFilename + "_fs"); //corpusOperation.weightModel. corpusOperation.weightModel.PrepareTheModel(output.context.spaceModel, logger); var wt_s = corpusOperation.weightModel.GetElementFactors(output.context.SelectedFeatures.GetKeys(), output.context.spaceModel); wt_s.Save(fold_notes.folder, notes, setup.OutputFilename); corpusOperation.weightModel.Save(setup.OutputFilename, fold_notes.folder, notes); OperationContextReport reportOperation = new OperationContextReport(); reportOperation.DeploySettingsBase(notes); reportOperation.GenerateReports(output.context, setup.reportOptions, notes); } Close(); return(output); }
public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null) { ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain); Open(); Boolean skip = false; // String fn = setup.OutputFilename; String p_m = WeightDictionary.GetDictionaryFilename(setup.OutputFilename, fold_notes.folder); //FeatureWeightModel.GetModelDefinitionFilename(setup.OutputFilename, fold_notes.folder); //String p_d = FeatureWeightModel.GetModelDataFilename(setup.OutputFilename, fold_notes.folder); if (setup.skipIfExisting) { if (File.Exists(p_m)) { logger.log("WeightTable [" + p_m + "] found, skipping the operation"); skip = true; } } if (!skip) { notes.log("Rendering primary view"); // ------------------- PRIMARY CONTEXT output.context.DeployDataSet(fold, logger); primaryEntityOperation.TextRendering(output.context, notes); //primaryEntityOperation.TextPreblendFilter(output.context, notes); //primaryEntityOperation.TextBlending(output.context, notes); corpusOperation.SpaceModelPopulation(output.context, notes); corpusOperation.SpaceModelCategories(output.context, notes); corpusOperation.FeatureSelection(output.context, notes, requirements.MayUseSelectedFeatures); OperationContext primaryContext = output.context; // ------------------- SECONDARY CONTEXT output.context = new OperationContext(); notes.log("Rendering secondary view"); output.context.DeployDataSet(fold, logger); secondaryEntityOperation.TextRendering(output.context, notes); // secondaryEntityOperation.TextPreblendFilter(output.context, notes); // secondaryEntityOperation.TextBlending(output.context, notes); corpusOperation.SpaceModelPopulation(output.context, notes); corpusOperation.SpaceModelCategories(output.context, notes); corpusOperation.FeatureSelection(output.context, notes, requirements.MayUseSelectedFeatures); OperationContext secondaryContext = output.context; ProjectionDictionary projectionPairs = DocumentRankingTools.ConstructPairDictionary(primaryContext.spaceModel.documents, secondaryContext.spaceModel.documents); DocumentSelectResult drmContext = output.context.PrepareContext(rankingOperation, fold_notes.folder, logger); drmContext = rankingOperation.ExecuteEvaluation(drmContext, logger); drmContext.description = "Document score assigned to the primary text render" + name; drmContext.saveObjectToXML(fold_notes.folder.pathFor("DS_" + name + "_projection_score.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Projection within [" + name + "] operation")); TokenFrequencyAndScoreDictionary tokenFrequencyAndScoreDictionary = ProjectionTools.ProjectPrimaryTermsToScores(projectionPairs, drmContext, logger); WeightDictionary wt = tokenFrequencyAndScoreDictionary.ConstructWeightDictionary(); wt.name = setup.OutputFilename; wt.description = "Projected PrimaryView to ScoreTable - WeightTable, constructed from [" + projectionPairs.Count + "] render pairs. Document ranking: " + drmContext.description; wt.Save(fold_notes.folder, logger, setup.OutputFilename); // wt.saveObjectToXML(p_m); } Close(); return(output); }