Example #1
0
        public void PerformClassification(OperationContext context, ExperimentTruthTable truthTable, ClassificationDatasetSeparationEnum distributionRule, ILogBuilder log)
        {
            log.log("Performing classification");

            if (truthTable == null)
            {
                truthTable = new ExperimentTruthTable();
                notes.log(":: DEPLOYING IN-FOLD TRUTH TABLE ::");
                truthTable.Deploy(context.featureSpace, context.spaceModel.labels.Select(x => x.name).ToList(), log);
            }


            DistributeTrainingAndTestSets(distributionRule, truthTable, context.featureSpace, log, context.testSet, context.trainingSet);

            if (!context.trainingSet.Any())
            {
                notes.log("TRAINING SET EMPTY ---- APPLYING 1:1 EXPERIMENT SHEME: training and test set are the same");
            }
            else
            {
                notes.log("Training [" + classifier.name + "] with [" + context.trainingSet.Count + "] feature vectors.");
                classifier.DoTraining(context.trainingSet, log);

                log.log("Training [" + classifier.name + "] completed.");


                notes.log("Testing [" + classifier.name + "] with [" + context.testSet.Count + "] feature vectors.");

                context.testResults = new List <FeatureVectorWithLabelID>();

                var ts = context.testSet.Select(x => x.vector);

                foreach (FeatureVector fv in ts)
                {
                    Int32 result = classifier.DoSelect(fv, log);
                    FeatureVectorWithLabelID fvl = new FeatureVectorWithLabelID(fv, result);
                    context.testResults.Add(fvl);
                }


                log.log("Testing [" + classifier.name + "] completed.");
            }
        }
Example #2
0
        public void PerformClassification(OperationContext context, ExperimentTruthTable truthTable, ClassificationDatasetSeparationEnum distributionRule, ILogBuilder log)
        {
            log.log("Performing classification");

            if (truthTable == null)
            {
                truthTable = new ExperimentTruthTable();
                notes.log(":: DEPLOYING IN-FOLD TRUTH TABLE ::");
                log.log(":: DEPLOYING IN-FOLD TRUTH TABLE ::");
                truthTable.Deploy(context.featureSpace, context.spaceModel.labels.Select(x => x.name).ToList(), log);
            }


            DistributeTrainingAndTestSets(distributionRule, truthTable, context.featureSpace, log, context.testSet, context.trainingSet);

            if (!context.trainingSet.Any())
            {
                notes.log("TRAINING SET EMPTY ---- APPLYING 1:1 EXPERIMENT SHEME: training and test set are the same");
            }
            else
            {
                notes.log("Training [" + classifier.name + "] with [" + context.trainingSet.Count + "] feature vectors.");
                classifier.DoTraining(context.trainingSet, log);

                log.log("Training [" + classifier.name + "] completed.");


                notes.log("Testing [" + classifier.name + "] with [" + context.testSet.Count + "] feature vectors.");

                context.testResults = new List <FeatureVectorWithLabelID>();

                var ts = context.testSet.Select(x => x.vector);

                List <Int32> distinctResults = new List <int>();


                foreach (FeatureVector fv in ts)
                {
                    Int32 result = classifier.DoSelect(fv, log);
                    if (!distinctResults.Contains(result))
                    {
                        distinctResults.Add(result);
                    }
                    FeatureVectorWithLabelID fvl = new FeatureVectorWithLabelID(fv, result);
                    context.testResults.Add(fvl);
                }

                if (distinctResults.Count < truthTable.labels_without_unknown.Count)
                {
                    List <String> no_match_labels = truthTable.labels_without_unknown.ToList();
                    foreach (Int32 d in distinctResults)
                    {
                        no_match_labels.Remove(truthTable.labels_without_unknown[d]);
                    }

                    log.log("WARNING --- [" + classifier.name + "] ONLY [" + distinctResults.Count + "] of [" + truthTable.labels_without_unknown.Count + "] were assigned by the classifier");
                    foreach (String l in no_match_labels)
                    {
                        log.log("Class [" + l + "] received no assigment");
                    }

                    foreach (var v in context.testSet)
                    {
                        var dist = v.GetDistinctValuesAtVector();
                        if (dist.Count < 2)
                        {
                            log.log("Test vector [" + v.name + "] has [" + dist.Count + "] distinct values at [" + v.dimensions.Length + "] dimensions!");
                        }
                    }

                    foreach (var v in context.trainingSet)
                    {
                        var dist = v.GetDistinctValuesAtVector();
                        if (dist.Count < 2)
                        {
                            log.log("Training vector [" + v.name + "] has [" + dist.Count + "] distinct values at [" + v.dimensions.Length + "] dimensions!");
                        }
                    }
                }

                log.log("Testing [" + classifier.name + "] completed.");
            }
        }
Example #3
0
        //public ExperimentTruthTable ConstructTruthTable(FeatureSpace space, ILogBuilder log)
        //{

        //    ExperimentTruthTable output = new ExperimentTruthTable();

        //    output.Deploy(space, )

        //    List<String> labels = space.labelToDocumentAssociations.GetAllDistinctNames(true);

        //    List<FeatureVectorWithLabelID> dataset = new List<FeatureVectorWithLabelID>();

        //    foreach (FeatureVector vec in space.documents)
        //    {
        //        var associated = space.labelToDocumentAssociations.GetAllLinked(vec);

        //        Int32 lbi = -1;


        //        FeatureVectorWithLabelID fvl = null;

        //        if (associated.Any())
        //        {
        //            lbi = labels.IndexOf(associated.First().name);
        //        }
        //        else
        //        {
        //            lbi = labels.IndexOf(SpaceLabel.UNKNOWN);
        //        }

        //        fvl = new FeatureVectorWithLabelID(vec, lbi);
        //        dataset.Add(fvl);
        //    }

        //    output.Deploy(dataset, log);
        //    return output;

        //}


        public void DistributeTrainingAndTestSets(ClassificationDatasetSeparationEnum distributionRule, ExperimentTruthTable truthTable, FeatureSpace featureSpace, ILogBuilder log, List <FeatureVectorWithLabelID> testSet, List <FeatureVectorWithLabelID> trainingSet)
        {
            log.log("Spliting data [" + distributionRule.ToString() + "]");

            List <FeatureVectorWithLabelID> __testSet     = new List <FeatureVectorWithLabelID>();
            List <FeatureVectorWithLabelID> __trainingSet = new List <FeatureVectorWithLabelID>();


            foreach (FeatureVector vec in featureSpace.documents)
            {
                var associated = featureSpace.labelToDocumentAssociations.GetAllLinked(vec);

                Int32 lbi = -1;


                FeatureVectorWithLabelID fvl = null;

                if (associated.Any())
                {
                    lbi = truthTable.labels_without_unknown.IndexOf(associated.First().name);
                }

                fvl = new FeatureVectorWithLabelID(vec, lbi);

                if (lbi == -1)
                {
                    __testSet.Add(fvl);
                }
                else
                {
                    __trainingSet.Add(fvl);
                }
            }



            if (!__testSet.Any())
            {
                notes.log("TEST SET IS EMPTY ---- APPLYING 1:1 EXPERIMENT SHEME: training and test set are the same");
                __trainingSet.ForEach(x => __testSet.Add(x));
            }
            else
            {
                if (distributionRule.HasFlag(ClassificationDatasetSeparationEnum.TestLabeled))
                {
                    testSet.AddRange(__trainingSet, true);
                }

                if (distributionRule.HasFlag(ClassificationDatasetSeparationEnum.TestUnlabeled))
                {
                    testSet.AddRange(__testSet, true);
                }

                if (distributionRule.HasFlag(ClassificationDatasetSeparationEnum.TrainingLabeled))
                {
                    trainingSet.AddRange(__trainingSet, true);
                }

                if (distributionRule.HasFlag(ClassificationDatasetSeparationEnum.TrainingUnlabeled))
                {
                    trainingSet.AddRange(__testSet, true);
                }
            }


            log.log("Training [" + trainingSet.Count + "] - Testing [" + testSet.Count + "]");

            //switch (distributionRule)
            //{
            //    case ClassificationDatasetSeparationEnum.TrainingAll_TestAll:
            //
            //        break;
            //    case ClassificationDatasetSeparationEnum.TrainingAll_TestUnlabeled:

            //        break;
            //    case ClassificationDatasetSeparationEnum.TrainingLabeled_TestAll:
            //        trainingSet.ForEach(x => testSet.Add(x.vector));
            //        break;
            //    case ClassificationDatasetSeparationEnum.TrainingLabeled_TestUnlabeled:
            //        // just fine
            //        break;
            //}
        }