/// <summary> /// Merges the ds rankings - searches folder for specified input names or search pattern /// </summary> /// <param name="folder">The folder.</param> /// <param name="inputNames">The input names.</param> /// <param name="output">The output.</param> /// <param name="searchPattern">The search pattern.</param> /// <returns></returns> public static FeatureVectorDictionaryWithDimensions MergeDSRankings(folderNode folder, String inputNames, ILogBuilder output, String searchPattern = "DS_*_ranking.xml") { List <string> filepaths = folder.GetOrFindFiles(inputNames, searchPattern); DocumentSelectResult resultOut = new DocumentSelectResult(); List <DocumentSelectResult> results = new List <DocumentSelectResult>(); List <String> existingNames = new List <string>(); String tmpOutputName = ""; foreach (var fp in filepaths) { var lr = DocumentSelectResult.LoadFromFile(fp, output); String fn = Path.GetFileNameWithoutExtension(fp); if (existingNames.Contains(lr.name)) { lr.name = fn; } existingNames.Add(lr.name); results.Add(lr); tmpOutputName += lr.name; } FeatureVectorDictionaryWithDimensions featureDict = DocumentRankingExtensions.TransformToFVDictionary(results); return(featureDict); }
/// <summary>Generates reports on precompiled resources for each fold</summary> /// <remarks><para>What it will do?</para></remarks> /// <param name="word">--</param> /// <param name="steps">--</param> /// <param name="debug">--</param> /// <seealso cref="aceOperationSetExecutorBase"/> public void aceOperation_runReportOnResources( [Description("--")] String word = "word", [Description("--")] Int32 steps = 5, [Description("--")] Boolean debug = true) { SetupDocumentSelection setup = docSelection.data.CloneViaXML(); ProceduralFolderFor <ProcedureCreateScoreSet, SetupDocumentSelection, OperationContext, ExperimentModelExecutionContext> procedures = new ProceduralFolderFor <ProcedureCreateScoreSet, SetupDocumentSelection, OperationContext, ExperimentModelExecutionContext>(mainContext.folds, setup, mainContext.notes, parent); foreach (var p in procedures) { p.Open(); //ExperimentDataSetFoldContextPair<OperationContext> o_pair = new ExperimentDataSetFoldContextPair<OperationContext>(p.fold, new OperationContext()); //o_pair.context.DeployDataSet(p.fold, output); FeatureVectorDictionaryWithDimensions dictWithDim = DocumentRankingExtensions.MergeDSRankings(p.fold_notes.folder, "", output, "*_ranking.xml"); var t = dictWithDim.Values.MakeTable(dictWithDim.dimensions, "Rankings", p.description, mainContext.truthTable.label_index); p.fold_notes.SaveDataTable(t); //t.GetReportAndSave(p.fold_notes.folder, imbACE.Core.application.) var dictFWT = WeightDictionaryTools.MergeWeightDictionaries(p.fold_notes.folder, "", output, "*_wt.xml"); var tfw = dictFWT.Values.MakeTable(dictFWT.dimensions, "WeightTable", p.description); p.fold_notes.SaveDataTable(tfw); p.Close(); } }
/// <summary> /// Gets the evaluation as feature vector dictionary. /// </summary> /// <param name="testResults">The test results.</param> /// <param name="_testName">Name of the test.</param> /// <param name="logger">The logger.</param> /// <param name="correctScore">Score to assign for correct classification</param> /// <param name="incorrectScore">Score to assign for incorrect classification</param> /// <returns></returns> public FeatureVectorDictionaryWithDimensions GetEvaluationAsFeatureVectorDictionary(List <FeatureVectorWithLabelID> testResults, String _testName, ILogBuilder logger, Double correctScore = 1, Double incorrectScore = -1) { FeatureVectorDictionaryWithDimensions output = new FeatureVectorDictionaryWithDimensions(); output.dimensions.Add("Correct", "Classification is performed correctly in [" + _testName + "] test", Feature.Settings.FeatureVectorDimensionType.precompiledDocumentScore, _testName); foreach (FeatureVectorWithLabelID test_item in testResults) { String test_response = labels_without_unknown[test_item.labelID]; String test_truth = siteToLabel[test_item.vector.name]; var fv = output.Create(test_item.name); if (test_response == test_truth) { fv.dimensions[0] = correctScore; } else { fv.dimensions[0] = incorrectScore; } output.Add(fv); } return(output); }
/// <summary> /// Merges the ds rankings - searches folder for specified input names or search pattern /// </summary> /// <param name="folder">The folder.</param> /// <param name="inputNames">The input names.</param> /// <param name="output">The output.</param> /// <param name="searchPattern">The search pattern.</param> /// <returns></returns> public static FeatureVectorDictionaryWithDimensions MergeWeightDictionaries(folderNode folder, String inputNames, ILogBuilder output, String searchPattern = "*_wt.xml") { List <string> filepaths = folder.GetOrFindFiles(inputNames, searchPattern); List <WeightDictionary> results = new List <WeightDictionary>(); String tmpOutputName = ""; Int32 c = 0; foreach (var fp in filepaths) { var lr = WeightDictionary.LoadFile(fp, output); //DocumentSelectResult.LoadFromFile(fp, output); lr.description += "Source name: " + lr.name; String fn = Path.GetFileNameWithoutExtension(fp); lr.name = fn + c.ToString("D3"); c++; results.Add(lr); } FeatureVectorDictionaryWithDimensions featureDict = MergeWeightDictionaries(results); return(featureDict); }
//public static DocumentSelectResult ToDocumentSelectResult(this FeatureVectorDictionaryWithDimensions input) //{ //} /// <summary> /// Transforms multiple document selection results into FeatureVector dictionary /// </summary> /// <param name="documentSelections">The document selections.</param> /// <returns></returns> public static FeatureVectorDictionaryWithDimensions TransformToFVDictionary(this IEnumerable <DocumentSelectResult> documentSelections) { FeatureVectorDictionaryWithDimensions output = new FeatureVectorDictionaryWithDimensions(); Dictionary <String, Dictionary <String, DocumentSelectResultEntry> > entryIDvsResultSets = new Dictionary <string, Dictionary <string, DocumentSelectResultEntry> >(); List <String> existingNames = new List <string>(); Int32 i = 0; foreach (DocumentSelectResult res in documentSelections) { if (existingNames.Contains(res.name) || res.name.isNullOrEmpty()) { res.name = i.ToString("D3"); } existingNames.Add(res.name); output.dimensions.Add(res.name, res.description, Feature.Settings.FeatureVectorDimensionType.precompiledDocumentScore, res.name); foreach (DocumentSelectResultEntry entry in res.items) { if (!entryIDvsResultSets.ContainsKey(entry.AssignedID)) { entryIDvsResultSets.Add(entry.AssignedID, new Dictionary <string, DocumentSelectResultEntry>()); } if (!entryIDvsResultSets[entry.AssignedID].ContainsKey(res.name)) { entryIDvsResultSets[entry.AssignedID].Add(res.name, entry); } } i++; } foreach (KeyValuePair <string, Dictionary <string, DocumentSelectResultEntry> > pair in entryIDvsResultSets) { var fv = output.GetOrAdd(pair.Key); foreach (KeyValuePair <string, DocumentSelectResultEntry> entries in pair.Value) { Int32 id = output.dimensions.IndexOf(entries.Key); if (id > -1) { fv.dimensions[id] = entries.Value.score; } } } return(output); }
public static FeatureVectorDictionaryWithDimensions MergeWeightDictionaries(this List <WeightDictionary> results) { FeatureVectorDictionaryWithDimensions featureDict = new FeatureVectorDictionaryWithDimensions(); Int32 i = 0; foreach (var dict in results) { featureDict.dimensions.Add(dict.name + i.ToString(), "Weights from [" + dict.name + "]", Feature.Settings.FeatureVectorDimensionType.precompiledDocumentScore, "WeightDictionary"); } WeightDictionary output = MergeDimensions(results); var fvs = output.index.Values.ToFeatureVectors(); foreach (var fv in fvs) { featureDict.Add(fv); } return(featureDict); }
/// <summary> /// Prepares the specified context. /// </summary> /// <param name="context">The context.</param> /// <param name="log">The log.</param> /// <exception cref="ArgumentException">context</exception> public override void Prepare(DocumentSelectResult context, ILogBuilder log) { //context.folder.GetOrFindFiles("*", dictionaryFile + "*.xml"); scoreDictionary = FeatureVectorDictionaryWithDimensions.LoadFile(context.folder, dictionaryFile, log); // WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(dictionaryFile, context.folder), log); if (scoreDictionary == null) { String msg = "Error: Failed to find score dictionary [" + dictionaryFile + "] in " + context.folder.path; throw new ArgumentException(msg, nameof(context)); } if (useMachineLearning) { #region --------------- PREPARING TERM WEIGHT MODEL String p_m = FeatureWeightModel.GetModelDefinitionFilename(modelDefinitionFile, context.folder); String p_d = FeatureWeightModel.GetModelDataFilename(modelDefinitionFile, context.folder); if (TermWeightModel == null) { TermWeightModel = FeatureWeightModel.LoadModel(p_m, log); } TermWeightModel.Deploy(log); if (context.spaceModel == null) { String msg = "Error: TermWeight factor requires SpaceModel declared in the context for operation"; throw new ArgumentException(msg, nameof(context)); } if (File.Exists(p_d) && useStoredData) { WeightingModelDataSet data = objectSerialization.loadObjectFromXML <WeightingModelDataSet>(p_d, log); TermWeightModel.LoadModelDataSet(data, log); if (useSelectedFeatures) { SelectedTerms = WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(modelDefinitionFile + "_sf", context.folder), log); } } else { TermWeightModel.PrepareTheModel(context.spaceModel, log); } if (SelectedTerms.Count == 0) { SelectedTerms = context.selectedFeatures; } List <String> sel_tkns = new List <String>(); sel_tkns.AddRange(SelectedTerms.index.Values.Select(x => x.name)); if (!sel_tkns.Any()) { sel_tkns.AddRange(context.spaceModel.terms_known_label.GetTokens()); } #endregion fvConstructor.Deploy(featureMethod.constructor, sel_tkns); classifier = featureMethod.classifierSettings.GetClassifier(); sc_id = scoreDictionary.GetVectorsWithLabelID(null, criterion).ToNameVsLabelID(); List <FeatureVectorWithLabelID> trainingSet = new List <FeatureVectorWithLabelID>(); foreach (var item in context.items) { if (sc_id.ContainsKey(item.AssignedID)) { WeightDictionary dc_vec = TermWeightModel.GetWeights(sel_tkns, item.spaceDocument, context.spaceModel); var n_vec = fvConstructor.ConstructFeatureVector(dc_vec, item.AssignedID); FeatureVectorWithLabelID id_vec = new FeatureVectorWithLabelID(n_vec, sc_id[item.AssignedID]); trainingSet.Add(id_vec); } } log.log("Training [" + classifier.name + "] with [" + sc_id.Count + "] feature vectors."); classifier.DoTraining(trainingSet, log); } }