Exemple #1
0
        //public void Deploy(FeatureVectorDictionary dataset, ILogBuilder logger)
        //{
        //    Deploy(dataset.GetVectorsWithLabelID(d))
        //}

        public void Deploy(FeatureSpace space, List <String> labels, ILogBuilder logger)
        {
            //List<String> labels = space.labelToDocumentAssociations.GetAllDistinctNames(true);

            List <FeatureVectorWithLabelID> dataset = new List <FeatureVectorWithLabelID>();

            foreach (FeatureVector vec in space.documents)
            {
                var associated = space.labelToDocumentAssociations.GetAllLinked(vec);

                Int32 lbi = -1;


                FeatureVectorWithLabelID fvl = null;

                if (associated.Any())
                {
                    lbi = labels.IndexOf(associated.First().name);
                }
                else
                {
                    lbi = labels.IndexOf(SpaceLabel.UNKNOWN);
                }

                fvl = new FeatureVectorWithLabelID(vec, lbi);
                dataset.Add(fvl);
            }

            Deploy(dataset, logger, labels);
        }
        public override double Score(DocumentSelectResultEntry entry, DocumentSelectResult context, ILogBuilder log)
        {
            FeatureVectorWithLabelID fv = vectorDictionary.Get(entry.DomainID, entry.AssignedID);

            if (fv == null)
            {
                log.log("Can't find vector dictionary entry for [" + entry.DomainID + "]>[" + entry.AssignedID + "]");
                return(0);
            }
            Double sc = 0;

            if (computation.HasFlag(ScoreComputationModeEnum.offset))
            {
                sc = fv.CompressByTrueDimension(fv.labelID);
            }
            else if (computation.HasFlag(ScoreComputationModeEnum.variance))
            {
                sc = fv.dimensions.GetVarianceCoefficient();
            }
            else if (computation.HasFlag(ScoreComputationModeEnum.distance))
            {
                sc = fv.CompressNumericVector(imbSCI.Core.enums.operation.max);
            }
            else
            {
                sc = fv.dimensions[0];
            }

            if (computation.HasFlag(ScoreComputationModeEnum.inverse))
            {
                sc = -sc;
            }

            if (sc == Double.NaN)
            {
                sc = 0;
            }
            else
            {
            }

            return(sc);
        }
Exemple #3
0
        public void PerformClassification(OperationContext context, ExperimentTruthTable truthTable, ClassificationDatasetSeparationEnum distributionRule, ILogBuilder log)
        {
            log.log("Performing classification");

            if (truthTable == null)
            {
                truthTable = new ExperimentTruthTable();
                notes.log(":: DEPLOYING IN-FOLD TRUTH TABLE ::");
                truthTable.Deploy(context.featureSpace, context.spaceModel.labels.Select(x => x.name).ToList(), log);
            }


            DistributeTrainingAndTestSets(distributionRule, truthTable, context.featureSpace, log, context.testSet, context.trainingSet);

            if (!context.trainingSet.Any())
            {
                notes.log("TRAINING SET EMPTY ---- APPLYING 1:1 EXPERIMENT SHEME: training and test set are the same");
            }
            else
            {
                notes.log("Training [" + classifier.name + "] with [" + context.trainingSet.Count + "] feature vectors.");
                classifier.DoTraining(context.trainingSet, log);

                log.log("Training [" + classifier.name + "] completed.");


                notes.log("Testing [" + classifier.name + "] with [" + context.testSet.Count + "] feature vectors.");

                context.testResults = new List <FeatureVectorWithLabelID>();

                var ts = context.testSet.Select(x => x.vector);

                foreach (FeatureVector fv in ts)
                {
                    Int32 result = classifier.DoSelect(fv, log);
                    FeatureVectorWithLabelID fvl = new FeatureVectorWithLabelID(fv, result);
                    context.testResults.Add(fvl);
                }


                log.log("Testing [" + classifier.name + "] completed.");
            }
        }
Exemple #4
0
        /// <summary>
        /// Executes the plane method, invoking contained functions according to the settings
        /// </summary>
        /// <param name="inputContext">The input context - related to this plane.</param>
        /// <param name="generalContext">General execution context, attached to the <see cref="T:imbNLP.Toolkit.Planes.PlanesMethodDesign" /></param>
        /// <param name="logger">The logger.</param>
        /// <returns>
        /// Retur
        /// </returns>
        public IPlaneContext ExecutePlaneMethod(IPlaneContext inputContext, ExperimentModelExecutionContext generalContext, ILogBuilder logger)
        {
            notes.logStartPhase("[4] Feature Plane - execution", "");

            IFeaturePlaneContext context = inputContext as IFeaturePlaneContext;



            foreach (FeatureVector vec in context.featureSpace.documents)
            {
                var associated = context.featureSpace.labelToDocumentAssociations.GetAllLinked(vec);
                if (associated.Any())
                {
                    Int32 lbi = generalContext.truthTable.labels_without_unknown.IndexOf(associated.First().name);

                    FeatureVectorWithLabelID fvl = new FeatureVectorWithLabelID(vec, lbi);
                    context.trainingSet.Add(fvl);
                }
                else
                {
                    context.testSet.Add(vec);
                }
            }

            if (!context.testSet.Any())
            {
                notes.log("TEST SET IS EMPTY ---- APPLYING 1:1 EXPERIMENT SHEME: training and test set are the same");

                context.trainingSet.ForEach(x => context.testSet.Add(x.vector));
            }


            if ((!context.trainingSet.Any()))
            {
                notes.log("TRAINING SET EMPTY ---- APPLYING 1:1 EXPERIMENT SHEME: training and test set are the same");
            }
            else
            {
                notes.log("Training [" + classifier.name + "] with [" + context.trainingSet.Count + "] feature vectors.");
                classifier.DoTraining(context.trainingSet, logger);

                notes.log("Testing [" + classifier.name + "] with [" + context.testSet.Count + "] feature vectors.");

                context.testResults = new List <FeatureVectorWithLabelID>();
                foreach (FeatureVector fv in context.testSet)
                {
                    Int32 result = classifier.DoSelect(fv, logger);
                    FeatureVectorWithLabelID fvl = new FeatureVectorWithLabelID(fv, result);
                    context.testResults.Add(fvl);
                }



                /*
                 * Dictionary<string, List<FeatureVectorWithLabelID>> byCategory = generalContext.truthTable.GroupByTrueCategory(context.testResults);
                 * objectTable<classificationReport> tbl = new objectTable<classificationReport>(nameof(classificationReport.Name), "inclass_" + generalContext.runName);
                 * classificationReport macroAverage = new classificationReport("AVG-" + generalContext.runName);
                 * foreach (KeyValuePair<string, List<FeatureVectorWithLabelID>> pair in byCategory)
                 * {
                 *  var cReport = generalContext.EvaluateTestResults(pair.Value, pair.Key + "-" + generalContext.runName, logger);
                 *
                 *  cReport.Classifier = classifier.name;
                 *  cReport.Comment = "Tr/Ts [" + context.trainingSet.Count + "]:[" + context.testSet.Count + "]";
                 *  String path = notes.folder_classification.pathFor(pair.Key + "_result.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized evaluation result within category [" + pair.Key + "]", true);
                 *
                 *  macroAverage.AddValues(cReport);
                 *
                 *  tbl.Add(cReport);
                 * }
                 * //  macroAverage.DivideValues(byCategory.Keys.Count);
                 *
                 * tbl.Add(macroAverage);
                 *
                 * notes.SaveDataTable(tbl.GetDataTable(), notes.folder_classification);
                 */
            }



            notes.logEndPhase();

            return(context);
        }
        /// <summary>
        /// Prepares the specified context.
        /// </summary>
        /// <param name="context">The context.</param>
        /// <param name="log">The log.</param>
        /// <exception cref="ArgumentException">context</exception>
        public override void Prepare(DocumentSelectResult context, ILogBuilder log)
        {
            //context.folder.GetOrFindFiles("*", dictionaryFile + "*.xml");

            scoreDictionary = FeatureVectorDictionaryWithDimensions.LoadFile(context.folder, dictionaryFile, log); // WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(dictionaryFile, context.folder), log);

            if (scoreDictionary == null)
            {
                String msg = "Error: Failed to find score dictionary [" + dictionaryFile + "] in " + context.folder.path;
                throw new ArgumentException(msg, nameof(context));
            }

            if (useMachineLearning)
            {
                #region --------------- PREPARING TERM WEIGHT MODEL


                String p_m = FeatureWeightModel.GetModelDefinitionFilename(modelDefinitionFile, context.folder);
                String p_d = FeatureWeightModel.GetModelDataFilename(modelDefinitionFile, context.folder);


                if (TermWeightModel == null)
                {
                    TermWeightModel = FeatureWeightModel.LoadModel(p_m, log);
                }


                TermWeightModel.Deploy(log);

                if (context.spaceModel == null)
                {
                    String msg = "Error: TermWeight factor requires SpaceModel declared in the context for operation";
                    throw new ArgumentException(msg, nameof(context));
                }



                if (File.Exists(p_d) && useStoredData)
                {
                    WeightingModelDataSet data = objectSerialization.loadObjectFromXML <WeightingModelDataSet>(p_d, log);
                    TermWeightModel.LoadModelDataSet(data, log);

                    if (useSelectedFeatures)
                    {
                        SelectedTerms = WeightDictionary.LoadFile(WeightDictionary.GetDictionaryFilename(modelDefinitionFile + "_sf", context.folder), log);
                    }
                }
                else
                {
                    TermWeightModel.PrepareTheModel(context.spaceModel, log);
                }

                if (SelectedTerms.Count == 0)
                {
                    SelectedTerms = context.selectedFeatures;
                }
                List <String> sel_tkns = new List <String>();

                sel_tkns.AddRange(SelectedTerms.index.Values.Select(x => x.name));

                if (!sel_tkns.Any())
                {
                    sel_tkns.AddRange(context.spaceModel.terms_known_label.GetTokens());
                }


                #endregion

                fvConstructor.Deploy(featureMethod.constructor, sel_tkns);



                classifier = featureMethod.classifierSettings.GetClassifier();

                sc_id = scoreDictionary.GetVectorsWithLabelID(null, criterion).ToNameVsLabelID();


                List <FeatureVectorWithLabelID> trainingSet = new List <FeatureVectorWithLabelID>();
                foreach (var item in context.items)
                {
                    if (sc_id.ContainsKey(item.AssignedID))
                    {
                        WeightDictionary dc_vec = TermWeightModel.GetWeights(sel_tkns, item.spaceDocument, context.spaceModel);


                        var n_vec = fvConstructor.ConstructFeatureVector(dc_vec, item.AssignedID);

                        FeatureVectorWithLabelID id_vec = new FeatureVectorWithLabelID(n_vec, sc_id[item.AssignedID]);

                        trainingSet.Add(id_vec);
                    }
                }


                log.log("Training [" + classifier.name + "] with [" + sc_id.Count + "] feature vectors.");
                classifier.DoTraining(trainingSet, log);
            }
        }
Exemple #6
0
        public void PerformClassification(OperationContext context, ExperimentTruthTable truthTable, ClassificationDatasetSeparationEnum distributionRule, ILogBuilder log)
        {
            log.log("Performing classification");

            if (truthTable == null)
            {
                truthTable = new ExperimentTruthTable();
                notes.log(":: DEPLOYING IN-FOLD TRUTH TABLE ::");
                log.log(":: DEPLOYING IN-FOLD TRUTH TABLE ::");
                truthTable.Deploy(context.featureSpace, context.spaceModel.labels.Select(x => x.name).ToList(), log);
            }


            DistributeTrainingAndTestSets(distributionRule, truthTable, context.featureSpace, log, context.testSet, context.trainingSet);

            if (!context.trainingSet.Any())
            {
                notes.log("TRAINING SET EMPTY ---- APPLYING 1:1 EXPERIMENT SHEME: training and test set are the same");
            }
            else
            {
                notes.log("Training [" + classifier.name + "] with [" + context.trainingSet.Count + "] feature vectors.");
                classifier.DoTraining(context.trainingSet, log);

                log.log("Training [" + classifier.name + "] completed.");


                notes.log("Testing [" + classifier.name + "] with [" + context.testSet.Count + "] feature vectors.");

                context.testResults = new List <FeatureVectorWithLabelID>();

                var ts = context.testSet.Select(x => x.vector);

                List <Int32> distinctResults = new List <int>();


                foreach (FeatureVector fv in ts)
                {
                    Int32 result = classifier.DoSelect(fv, log);
                    if (!distinctResults.Contains(result))
                    {
                        distinctResults.Add(result);
                    }
                    FeatureVectorWithLabelID fvl = new FeatureVectorWithLabelID(fv, result);
                    context.testResults.Add(fvl);
                }

                if (distinctResults.Count < truthTable.labels_without_unknown.Count)
                {
                    List <String> no_match_labels = truthTable.labels_without_unknown.ToList();
                    foreach (Int32 d in distinctResults)
                    {
                        no_match_labels.Remove(truthTable.labels_without_unknown[d]);
                    }

                    log.log("WARNING --- [" + classifier.name + "] ONLY [" + distinctResults.Count + "] of [" + truthTable.labels_without_unknown.Count + "] were assigned by the classifier");
                    foreach (String l in no_match_labels)
                    {
                        log.log("Class [" + l + "] received no assigment");
                    }

                    foreach (var v in context.testSet)
                    {
                        var dist = v.GetDistinctValuesAtVector();
                        if (dist.Count < 2)
                        {
                            log.log("Test vector [" + v.name + "] has [" + dist.Count + "] distinct values at [" + v.dimensions.Length + "] dimensions!");
                        }
                    }

                    foreach (var v in context.trainingSet)
                    {
                        var dist = v.GetDistinctValuesAtVector();
                        if (dist.Count < 2)
                        {
                            log.log("Training vector [" + v.name + "] has [" + dist.Count + "] distinct values at [" + v.dimensions.Length + "] dimensions!");
                        }
                    }
                }

                log.log("Testing [" + classifier.name + "] completed.");
            }
        }
Exemple #7
0
        //public ExperimentTruthTable ConstructTruthTable(FeatureSpace space, ILogBuilder log)
        //{

        //    ExperimentTruthTable output = new ExperimentTruthTable();

        //    output.Deploy(space, )

        //    List<String> labels = space.labelToDocumentAssociations.GetAllDistinctNames(true);

        //    List<FeatureVectorWithLabelID> dataset = new List<FeatureVectorWithLabelID>();

        //    foreach (FeatureVector vec in space.documents)
        //    {
        //        var associated = space.labelToDocumentAssociations.GetAllLinked(vec);

        //        Int32 lbi = -1;


        //        FeatureVectorWithLabelID fvl = null;

        //        if (associated.Any())
        //        {
        //            lbi = labels.IndexOf(associated.First().name);
        //        }
        //        else
        //        {
        //            lbi = labels.IndexOf(SpaceLabel.UNKNOWN);
        //        }

        //        fvl = new FeatureVectorWithLabelID(vec, lbi);
        //        dataset.Add(fvl);
        //    }

        //    output.Deploy(dataset, log);
        //    return output;

        //}


        public void DistributeTrainingAndTestSets(ClassificationDatasetSeparationEnum distributionRule, ExperimentTruthTable truthTable, FeatureSpace featureSpace, ILogBuilder log, List <FeatureVectorWithLabelID> testSet, List <FeatureVectorWithLabelID> trainingSet)
        {
            log.log("Spliting data [" + distributionRule.ToString() + "]");

            List <FeatureVectorWithLabelID> __testSet     = new List <FeatureVectorWithLabelID>();
            List <FeatureVectorWithLabelID> __trainingSet = new List <FeatureVectorWithLabelID>();


            foreach (FeatureVector vec in featureSpace.documents)
            {
                var associated = featureSpace.labelToDocumentAssociations.GetAllLinked(vec);

                Int32 lbi = -1;


                FeatureVectorWithLabelID fvl = null;

                if (associated.Any())
                {
                    lbi = truthTable.labels_without_unknown.IndexOf(associated.First().name);
                }

                fvl = new FeatureVectorWithLabelID(vec, lbi);

                if (lbi == -1)
                {
                    __testSet.Add(fvl);
                }
                else
                {
                    __trainingSet.Add(fvl);
                }
            }



            if (!__testSet.Any())
            {
                notes.log("TEST SET IS EMPTY ---- APPLYING 1:1 EXPERIMENT SHEME: training and test set are the same");
                __trainingSet.ForEach(x => __testSet.Add(x));
            }
            else
            {
                if (distributionRule.HasFlag(ClassificationDatasetSeparationEnum.TestLabeled))
                {
                    testSet.AddRange(__trainingSet, true);
                }

                if (distributionRule.HasFlag(ClassificationDatasetSeparationEnum.TestUnlabeled))
                {
                    testSet.AddRange(__testSet, true);
                }

                if (distributionRule.HasFlag(ClassificationDatasetSeparationEnum.TrainingLabeled))
                {
                    trainingSet.AddRange(__trainingSet, true);
                }

                if (distributionRule.HasFlag(ClassificationDatasetSeparationEnum.TrainingUnlabeled))
                {
                    trainingSet.AddRange(__testSet, true);
                }
            }


            log.log("Training [" + trainingSet.Count + "] - Testing [" + testSet.Count + "]");

            //switch (distributionRule)
            //{
            //    case ClassificationDatasetSeparationEnum.TrainingAll_TestAll:
            //
            //        break;
            //    case ClassificationDatasetSeparationEnum.TrainingAll_TestUnlabeled:

            //        break;
            //    case ClassificationDatasetSeparationEnum.TrainingLabeled_TestAll:
            //        trainingSet.ForEach(x => testSet.Add(x.vector));
            //        break;
            //    case ClassificationDatasetSeparationEnum.TrainingLabeled_TestUnlabeled:
            //        // just fine
            //        break;
            //}
        }
Exemple #8
0
        /// <summary>
        /// Generates table from feature vectors
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="_vectors">The vectors.</param>
        /// <param name="dimensions">The dimensions.</param>
        /// <param name="name">The name.</param>
        /// <param name="description">The description.</param>
        /// <param name="labels">The labels.</param>
        /// <returns></returns>
        public static DataTable MakeTable <T>(this IEnumerable <T> _vectors, dimensionSpecificationSet dimensions, String name, String description, List <String> labels = null) where T : IVectorDimensions
        {
            DataTable table = new DataTable();

            table.SetTitle(name);
            table.SetDescription(description);



            Boolean hasLabelID = false;

            T first = _vectors.FirstOrDefault();

            if (first != null)
            {
                if (first is FeatureVectorWithLabelID)
                {
                    hasLabelID = true;
                }
            }


            List <T> vectors = _vectors.ToList();



            table.SetAdditionalInfoEntry("Documents", vectors.Count, "Total count of document vectors");
            table.SetAdditionalInfoEntry("Dimensions", dimensions.Count, "Number of dimensions");

            DataColumn column_rank = table.Add("Nr", "Order of appereance", "N", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal).SetWidth(10);

            DataColumn        column_token  = table.Add("Name", "Name of the document vector", "Name", typeof(String), imbSCI.Core.enums.dataPointImportance.normal).SetWidth(50);
            List <DataColumn> dimension_col = new List <DataColumn>();


            foreach (var dim in dimensions)
            {
                DataColumn dim_col = null;
                String     prefix  = dimension_col.Count.ToString("D3");
                dim_col = table.Add(dim.name, dim.description, "D" + prefix, typeof(Double), imbSCI.Core.enums.dataPointImportance.important, "F5");
                dimension_col.Add(dim_col);
            }

            DataColumn column_label = null;

            if (hasLabelID)
            {
                column_label = table.Add("Label", "Affiliation to a category", "Label", typeof(String), imbSCI.Core.enums.dataPointImportance.normal).SetWidth(50);
            }

            Int32 r = 1;

            foreach (T docVec in vectors)
            {
                var dr = table.NewRow();

                dr[column_rank]  = r;
                dr[column_token] = docVec.name;
                Int32 di = 0;
                foreach (DataColumn dc in dimension_col)
                {
                    if (di < docVec.dimensions.Length)
                    {
                        Double val = docVec.dimensions[di];
                        dr[dc] = val;
                    }

                    di++;
                }

                if (hasLabelID)
                {
                    Int32  lid     = 0;
                    String lbl_str = "";

                    FeatureVectorWithLabelID docVecWithLabel = docVec as FeatureVectorWithLabelID;

                    if (docVecWithLabel != null)
                    {
                        lid = docVecWithLabel.labelID;
                    }

                    if (labels != null)
                    {
                        if (lid > -1 && lid < labels.Count)
                        {
                            lbl_str = labels[lid];
                        }
                        else
                        {
                            lbl_str = lid.ToString();
                        }
                    }


                    dr[column_label] = lbl_str;
                }



                table.Rows.Add(dr);


                r++;
            }

            return(table);
        }