/// <summary> /// Scores the specified entry. /// </summary> /// <param name="entry">The entry.</param> /// <param name="context">The context.</param> /// <param name="log">The log.</param> /// <returns></returns> public override double Score(DocumentSelectResultEntry entry, DocumentSelectResult context, ILogBuilder log) { if (useMachineLearning) { WeightDictionary dc_vec = TermWeightModel.GetWeights(SelectedTerms.GetKeys(), entry.spaceDocument, context.spaceModel); var n_vec = fvConstructor.ConstructFeatureVector(dc_vec, entry.AssignedID); Double score = 0; Int32 l_id = -1; if (sc_id.ContainsKey(entry.AssignedID)) { l_id = sc_id[entry.AssignedID]; } score = classifier.DoScore(n_vec, log, l_id); return(score); } else { if (scoreDictionary.ContainsKey(entry.AssignedID)) { var fv = scoreDictionary[entry.AssignedID]; return(fv.CompressNumericVector(vectorCompression)); } else { return(0); } } }
/// <summary> /// Computes score for given entry /// </summary> /// <param name="entry">The entry.</param> /// <param name="context">The context.</param> /// <param name="log">The log.</param> /// <returns></returns> public override double Score(DocumentSelectResultEntry entry, DocumentSelectResult context, ILogBuilder log) { Double output = 0; foreach (String term in entry.spaceDocument.terms.GetTokens()) { Boolean isOk = true; if (context.selectedFeatures != null) { if (context.selectedFeatures.Count > 0) { if (!context.selectedFeatures.ContainsKey(term)) { isOk = false; } } } if (isOk && SelectedTerms != null) { if (SelectedTerms.Count > 0) { if (!SelectedTerms.ContainsKey(term)) { isOk = false; } } } if (isOk) { if (queryTerms.Any()) { if (queryTerms.Contains(term)) { output += TermWeightModel.GetWeight(term, entry.spaceDocument, context.spaceModel); } } else { output += TermWeightModel.GetWeight(term, entry.spaceDocument, context.spaceModel); } } } return(output); }
/// <summary> /// Prepares the specified context. /// </summary> /// <param name="context">The context.</param> /// <param name="log">The log.</param> /// <exception cref="ArgumentException">context</exception> public override void Prepare(DocumentSelectResult context, ILogBuilder log) { String p_m = FeatureWeightModel.GetModelDefinitionFilename(modelDefinitionFile, context.folder); String p_d = FeatureWeightModel.GetModelDataFilename(modelDefinitionFile, context.folder); TermWeightModel = FeatureWeightModel.LoadModel(p_m, log); //if (File.Exists(p_m)) //{ // //TermWeightModel = objectSerialization.loadObjectFromXML<FeatureWeightModel>(p_m, log); //} TermWeightModel.Deploy(log); if (context.spaceModel == null) { String msg = "Error: TermWeight factor requires SpaceModel declared in the context for operation"; throw new ArgumentException(msg, nameof(context)); } if (File.Exists(p_d) && useStoredData) { WeightingModelDataSet data = objectSerialization.loadObjectFromXML <WeightingModelDataSet>(p_d, log); TermWeightModel.LoadModelDataSet(data, log); if (useSelectedFeatures) { SelectedTerms = WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(modelDefinitionFile + "_sf", context.folder), log); } } else { TermWeightModel.PrepareTheModel(context.spaceModel, log); } if (context.query.isNullOrEmpty()) { context.query.QueryTerms = context.query.QueryTerms.Trim(); List <String> tkns = context.query.QueryTerms.getTokens(true, true, true, false, 4); foreach (String tkn in tkns) { queryTerms.Add(context.stemmingContext.Stem(tkn)); } } }
public override String GetSignature() { String output = computation.ToString().Replace(", ", "_"); if (TermWeightModel != null) { output += "[" + TermWeightModel.GetSignature() + "]"; } else if (!modelDefinitionFile.isNullOrEmpty()) { output += "[" + modelDefinitionFile + "]"; } output += GetWeightSignature(); return(output); }
public override String GetSignature() { String output = "TW"; if (TermWeightModel != null) { output += "[" + TermWeightModel.GetSignature() + "]"; } else if (!modelDefinitionFile.isNullOrEmpty()) { output += "[" + modelDefinitionFile + "]"; } output += GetWeightSignature(); return(output); }
public override String GetSignature() { String output = ""; if (useMachineLearning) { output = "ML"; if (featureMethod?.classifierSettings != null) { output += "[" + featureMethod.classifierSettings.GetSignature() + "]"; } else { } if (TermWeightModel != null) { output += "[" + TermWeightModel.GetSignature() + "]"; } else if (!modelDefinitionFile.isNullOrEmpty()) { output += "[" + modelDefinitionFile + "]"; } } else { output = "VI"; // computation.ToString().Replace(", ", "_"); if (!dictionaryFile.isNullOrEmpty()) { output += "[" + dictionaryFile + "]"; } } output += GetWeightSignature(); return(output); }
public override void Prepare(DocumentSelectResult context, ILogBuilder log) { String p_m = ""; String p_d = ""; modelDefinitionFile = modelDefinitionFile.Replace("*", ""); if (!modelDefinitionFile.isNullOrEmpty()) { p_m = FeatureWeightModel.GetModelDefinitionFilename(modelDefinitionFile, context.folder); p_d = FeatureWeightModel.GetModelDataFilename(modelDefinitionFile, context.folder); } if (TermWeightModel == null) { log.log("Loading model from [" + p_m + "]"); if (File.Exists(p_m)) { TermWeightModel = FeatureWeightModel.LoadModel(p_m, log); } } TermWeightModel.Deploy(log); if (File.Exists(p_d) && UseModelData) { log.log("Loading model data from [" + p_d + "]"); var dataset = objectSerialization.loadObjectFromXML <WeightingModelDataSet>(p_d, log); // WeightingModelDataSet TermWeightModel.LoadModelDataSet(dataset, log); } else { log.log("Preparing model ..."); TermWeightModel.PrepareTheModel(context.spaceModel, log); } if (computation.HasFlag(ScoreComputationModeEnum.category)) { vectorDictionary = context.TransformToFVDictionaryAsCategorySimilarity(TermWeightModel, function, log); } else if (computation.HasFlag(ScoreComputationModeEnum.site)) { vectorDictionary = context.TransformToFVDictionaryAsSiteSimilarity(TermWeightModel, function, log); } else if (computation.HasFlag(ScoreComputationModeEnum.pageDivergence)) { vectorDictionary = context.TransformToFVDictionaryAsPageSimilarity(TermWeightModel, function, ScoreComputationModeEnum.site, log); } else if (computation.HasFlag(ScoreComputationModeEnum.pagesOfCategory)) { vectorDictionary = context.TransformToFVDictionaryAsPageSimilarity(TermWeightModel, function, ScoreComputationModeEnum.category, log); } else if (computation.HasFlag(ScoreComputationModeEnum.pagesOfDataset)) { vectorDictionary = context.TransformToFVDictionaryAsPageSimilarity(TermWeightModel, function, ScoreComputationModeEnum.dataset, log); } log.log("Category similarity ready ... [" + computation.ToString() + "]"); }
/// <summary> /// Prepares the specified context. /// </summary> /// <param name="context">The context.</param> /// <param name="log">The log.</param> /// <exception cref="ArgumentException">context</exception> public override void Prepare(DocumentSelectResult context, ILogBuilder log) { //context.folder.GetOrFindFiles("*", dictionaryFile + "*.xml"); scoreDictionary = FeatureVectorDictionaryWithDimensions.LoadFile(context.folder, dictionaryFile, log); // WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(dictionaryFile, context.folder), log); if (scoreDictionary == null) { String msg = "Error: Failed to find score dictionary [" + dictionaryFile + "] in " + context.folder.path; throw new ArgumentException(msg, nameof(context)); } if (useMachineLearning) { #region --------------- PREPARING TERM WEIGHT MODEL String p_m = FeatureWeightModel.GetModelDefinitionFilename(modelDefinitionFile, context.folder); String p_d = FeatureWeightModel.GetModelDataFilename(modelDefinitionFile, context.folder); if (TermWeightModel == null) { TermWeightModel = FeatureWeightModel.LoadModel(p_m, log); } TermWeightModel.Deploy(log); if (context.spaceModel == null) { String msg = "Error: TermWeight factor requires SpaceModel declared in the context for operation"; throw new ArgumentException(msg, nameof(context)); } if (File.Exists(p_d) && useStoredData) { WeightingModelDataSet data = objectSerialization.loadObjectFromXML <WeightingModelDataSet>(p_d, log); TermWeightModel.LoadModelDataSet(data, log); if (useSelectedFeatures) { SelectedTerms = WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(modelDefinitionFile + "_sf", context.folder), log); } } else { TermWeightModel.PrepareTheModel(context.spaceModel, log); } if (SelectedTerms.Count == 0) { SelectedTerms = context.selectedFeatures; } List <String> sel_tkns = new List <String>(); sel_tkns.AddRange(SelectedTerms.index.Values.Select(x => x.name)); if (!sel_tkns.Any()) { sel_tkns.AddRange(context.spaceModel.terms_known_label.GetTokens()); } #endregion fvConstructor.Deploy(featureMethod.constructor, sel_tkns); classifier = featureMethod.classifierSettings.GetClassifier(); sc_id = scoreDictionary.GetVectorsWithLabelID(null, criterion).ToNameVsLabelID(); List <FeatureVectorWithLabelID> trainingSet = new List <FeatureVectorWithLabelID>(); foreach (var item in context.items) { if (sc_id.ContainsKey(item.AssignedID)) { WeightDictionary dc_vec = TermWeightModel.GetWeights(sel_tkns, item.spaceDocument, context.spaceModel); var n_vec = fvConstructor.ConstructFeatureVector(dc_vec, item.AssignedID); FeatureVectorWithLabelID id_vec = new FeatureVectorWithLabelID(n_vec, sc_id[item.AssignedID]); trainingSet.Add(id_vec); } } log.log("Training [" + classifier.name + "] with [" + sc_id.Count + "] feature vectors."); classifier.DoTraining(trainingSet, log); } }