/// <summary>
        /// Merges the ds rankings - searches folder for specified input names or search pattern
        /// </summary>
        /// <param name="folder">The folder.</param>
        /// <param name="inputNames">The input names.</param>
        /// <param name="output">The output.</param>
        /// <param name="searchPattern">The search pattern.</param>
        /// <returns></returns>
        public static FeatureVectorDictionaryWithDimensions MergeDSRankings(folderNode folder, String inputNames, ILogBuilder output, String searchPattern = "DS_*_ranking.xml")
        {
            List <string> filepaths = folder.GetOrFindFiles(inputNames, searchPattern);

            DocumentSelectResult resultOut = new DocumentSelectResult();

            List <DocumentSelectResult> results = new List <DocumentSelectResult>();
            List <String> existingNames         = new List <string>();

            String tmpOutputName = "";

            foreach (var fp in filepaths)
            {
                var    lr = DocumentSelectResult.LoadFromFile(fp, output);
                String fn = Path.GetFileNameWithoutExtension(fp);
                if (existingNames.Contains(lr.name))
                {
                    lr.name = fn;
                }
                existingNames.Add(lr.name);

                results.Add(lr);
                tmpOutputName += lr.name;
            }


            FeatureVectorDictionaryWithDimensions featureDict = DocumentRankingExtensions.TransformToFVDictionary(results);

            return(featureDict);
        }
Example #2
0
        /// <summary>Generates reports on precompiled resources for each fold</summary>
        /// <remarks><para>What it will do?</para></remarks>
        /// <param name="word">--</param>
        /// <param name="steps">--</param>
        /// <param name="debug">--</param>
        /// <seealso cref="aceOperationSetExecutorBase"/>
        public void aceOperation_runReportOnResources(
            [Description("--")] String word   = "word",
            [Description("--")] Int32 steps   = 5,
            [Description("--")] Boolean debug = true)
        {
            SetupDocumentSelection setup = docSelection.data.CloneViaXML();

            ProceduralFolderFor <ProcedureCreateScoreSet, SetupDocumentSelection, OperationContext, ExperimentModelExecutionContext> procedures
                = new ProceduralFolderFor <ProcedureCreateScoreSet, SetupDocumentSelection, OperationContext, ExperimentModelExecutionContext>(mainContext.folds, setup, mainContext.notes, parent);


            foreach (var p in procedures)
            {
                p.Open();


                //ExperimentDataSetFoldContextPair<OperationContext> o_pair = new ExperimentDataSetFoldContextPair<OperationContext>(p.fold, new OperationContext());
                //o_pair.context.DeployDataSet(p.fold, output);
                FeatureVectorDictionaryWithDimensions dictWithDim = DocumentRankingExtensions.MergeDSRankings(p.fold_notes.folder, "", output, "*_ranking.xml");

                var t = dictWithDim.Values.MakeTable(dictWithDim.dimensions, "Rankings", p.description, mainContext.truthTable.label_index);

                p.fold_notes.SaveDataTable(t);
                //t.GetReportAndSave(p.fold_notes.folder, imbACE.Core.application.)


                var dictFWT = WeightDictionaryTools.MergeWeightDictionaries(p.fold_notes.folder, "", output, "*_wt.xml");

                var tfw = dictFWT.Values.MakeTable(dictFWT.dimensions, "WeightTable", p.description);

                p.fold_notes.SaveDataTable(tfw);

                p.Close();
            }
        }
Example #3
0
        /// <summary>
        /// Gets the evaluation as feature vector dictionary.
        /// </summary>
        /// <param name="testResults">The test results.</param>
        /// <param name="_testName">Name of the test.</param>
        /// <param name="logger">The logger.</param>
        /// <param name="correctScore">Score to assign for correct classification</param>
        /// <param name="incorrectScore">Score to assign for incorrect classification</param>
        /// <returns></returns>
        public FeatureVectorDictionaryWithDimensions GetEvaluationAsFeatureVectorDictionary(List <FeatureVectorWithLabelID> testResults, String _testName, ILogBuilder logger, Double correctScore = 1, Double incorrectScore = -1)
        {
            FeatureVectorDictionaryWithDimensions output = new FeatureVectorDictionaryWithDimensions();

            output.dimensions.Add("Correct", "Classification is performed correctly in [" + _testName + "] test", Feature.Settings.FeatureVectorDimensionType.precompiledDocumentScore, _testName);


            foreach (FeatureVectorWithLabelID test_item in testResults)
            {
                String test_response = labels_without_unknown[test_item.labelID];

                String test_truth = siteToLabel[test_item.vector.name];

                var fv = output.Create(test_item.name);

                if (test_response == test_truth)
                {
                    fv.dimensions[0] = correctScore;
                }
                else
                {
                    fv.dimensions[0] = incorrectScore;
                }

                output.Add(fv);
            }

            return(output);
        }
Example #4
0
        /// <summary>
        /// Merges the ds rankings - searches folder for specified input names or search pattern
        /// </summary>
        /// <param name="folder">The folder.</param>
        /// <param name="inputNames">The input names.</param>
        /// <param name="output">The output.</param>
        /// <param name="searchPattern">The search pattern.</param>
        /// <returns></returns>
        public static FeatureVectorDictionaryWithDimensions MergeWeightDictionaries(folderNode folder, String inputNames, ILogBuilder output, String searchPattern = "*_wt.xml")
        {
            List <string> filepaths = folder.GetOrFindFiles(inputNames, searchPattern);


            List <WeightDictionary> results = new List <WeightDictionary>();

            String tmpOutputName = "";


            Int32 c = 0;

            foreach (var fp in filepaths)
            {
                var lr = WeightDictionary.LoadFile(fp, output); //DocumentSelectResult.LoadFromFile(fp, output);

                lr.description += "Source name: " + lr.name;
                String fn = Path.GetFileNameWithoutExtension(fp);
                lr.name = fn + c.ToString("D3");
                c++;
                results.Add(lr);
            }


            FeatureVectorDictionaryWithDimensions featureDict = MergeWeightDictionaries(results);

            return(featureDict);
        }
        //public static DocumentSelectResult ToDocumentSelectResult(this FeatureVectorDictionaryWithDimensions input)
        //{

        //}



        /// <summary>
        /// Transforms multiple document selection results into FeatureVector dictionary
        /// </summary>
        /// <param name="documentSelections">The document selections.</param>
        /// <returns></returns>
        public static FeatureVectorDictionaryWithDimensions TransformToFVDictionary(this IEnumerable <DocumentSelectResult> documentSelections)
        {
            FeatureVectorDictionaryWithDimensions output = new FeatureVectorDictionaryWithDimensions();


            Dictionary <String, Dictionary <String, DocumentSelectResultEntry> > entryIDvsResultSets = new Dictionary <string, Dictionary <string, DocumentSelectResultEntry> >();

            List <String> existingNames = new List <string>();

            Int32 i = 0;

            foreach (DocumentSelectResult res in documentSelections)
            {
                if (existingNames.Contains(res.name) || res.name.isNullOrEmpty())
                {
                    res.name = i.ToString("D3");
                }
                existingNames.Add(res.name);

                output.dimensions.Add(res.name, res.description, Feature.Settings.FeatureVectorDimensionType.precompiledDocumentScore, res.name);

                foreach (DocumentSelectResultEntry entry in res.items)
                {
                    if (!entryIDvsResultSets.ContainsKey(entry.AssignedID))
                    {
                        entryIDvsResultSets.Add(entry.AssignedID, new Dictionary <string, DocumentSelectResultEntry>());
                    }

                    if (!entryIDvsResultSets[entry.AssignedID].ContainsKey(res.name))
                    {
                        entryIDvsResultSets[entry.AssignedID].Add(res.name, entry);
                    }
                }
                i++;
            }

            foreach (KeyValuePair <string, Dictionary <string, DocumentSelectResultEntry> > pair in entryIDvsResultSets)
            {
                var fv = output.GetOrAdd(pair.Key);

                foreach (KeyValuePair <string, DocumentSelectResultEntry> entries in pair.Value)
                {
                    Int32 id = output.dimensions.IndexOf(entries.Key);

                    if (id > -1)
                    {
                        fv.dimensions[id] = entries.Value.score;
                    }
                }
            }

            return(output);
        }
Example #6
0
        public static FeatureVectorDictionaryWithDimensions MergeWeightDictionaries(this List <WeightDictionary> results)
        {
            FeatureVectorDictionaryWithDimensions featureDict = new FeatureVectorDictionaryWithDimensions();
            Int32 i = 0;

            foreach (var dict in results)
            {
                featureDict.dimensions.Add(dict.name + i.ToString(), "Weights from [" + dict.name + "]", Feature.Settings.FeatureVectorDimensionType.precompiledDocumentScore, "WeightDictionary");
            }
            WeightDictionary output = MergeDimensions(results);

            var fvs = output.index.Values.ToFeatureVectors();

            foreach (var fv in fvs)
            {
                featureDict.Add(fv);
            }

            return(featureDict);
        }
        /// <summary>
        /// Prepares the specified context.
        /// </summary>
        /// <param name="context">The context.</param>
        /// <param name="log">The log.</param>
        /// <exception cref="ArgumentException">context</exception>
        public override void Prepare(DocumentSelectResult context, ILogBuilder log)
        {
            //context.folder.GetOrFindFiles("*", dictionaryFile + "*.xml");

            scoreDictionary = FeatureVectorDictionaryWithDimensions.LoadFile(context.folder, dictionaryFile, log); // WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(dictionaryFile, context.folder), log);

            if (scoreDictionary == null)
            {
                String msg = "Error: Failed to find score dictionary [" + dictionaryFile + "] in " + context.folder.path;
                throw new ArgumentException(msg, nameof(context));
            }

            if (useMachineLearning)
            {
                #region --------------- PREPARING TERM WEIGHT MODEL


                String p_m = FeatureWeightModel.GetModelDefinitionFilename(modelDefinitionFile, context.folder);
                String p_d = FeatureWeightModel.GetModelDataFilename(modelDefinitionFile, context.folder);


                if (TermWeightModel == null)
                {
                    TermWeightModel = FeatureWeightModel.LoadModel(p_m, log);
                }


                TermWeightModel.Deploy(log);

                if (context.spaceModel == null)
                {
                    String msg = "Error: TermWeight factor requires SpaceModel declared in the context for operation";
                    throw new ArgumentException(msg, nameof(context));
                }



                if (File.Exists(p_d) && useStoredData)
                {
                    WeightingModelDataSet data = objectSerialization.loadObjectFromXML <WeightingModelDataSet>(p_d, log);
                    TermWeightModel.LoadModelDataSet(data, log);

                    if (useSelectedFeatures)
                    {
                        SelectedTerms = WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(modelDefinitionFile + "_sf", context.folder), log);
                    }
                }
                else
                {
                    TermWeightModel.PrepareTheModel(context.spaceModel, log);
                }

                if (SelectedTerms.Count == 0)
                {
                    SelectedTerms = context.selectedFeatures;
                }
                List <String> sel_tkns = new List <String>();

                sel_tkns.AddRange(SelectedTerms.index.Values.Select(x => x.name));

                if (!sel_tkns.Any())
                {
                    sel_tkns.AddRange(context.spaceModel.terms_known_label.GetTokens());
                }


                #endregion

                fvConstructor.Deploy(featureMethod.constructor, sel_tkns);



                classifier = featureMethod.classifierSettings.GetClassifier();

                sc_id = scoreDictionary.GetVectorsWithLabelID(null, criterion).ToNameVsLabelID();


                List <FeatureVectorWithLabelID> trainingSet = new List <FeatureVectorWithLabelID>();
                foreach (var item in context.items)
                {
                    if (sc_id.ContainsKey(item.AssignedID))
                    {
                        WeightDictionary dc_vec = TermWeightModel.GetWeights(sel_tkns, item.spaceDocument, context.spaceModel);


                        var n_vec = fvConstructor.ConstructFeatureVector(dc_vec, item.AssignedID);

                        FeatureVectorWithLabelID id_vec = new FeatureVectorWithLabelID(n_vec, sc_id[item.AssignedID]);

                        trainingSet.Add(id_vec);
                    }
                }


                log.log("Training [" + classifier.name + "] with [" + sc_id.Count + "] feature vectors.");
                classifier.DoTraining(trainingSet, log);
            }
        }