コード例 #1
0
        //public void Deploy(FeatureVectorDictionary dataset, ILogBuilder logger)
        //{
        //    Deploy(dataset.GetVectorsWithLabelID(d))
        //}

        public void Deploy(FeatureSpace space, List <String> labels, ILogBuilder logger)
        {
            //List<String> labels = space.labelToDocumentAssociations.GetAllDistinctNames(true);

            List <FeatureVectorWithLabelID> dataset = new List <FeatureVectorWithLabelID>();

            foreach (FeatureVector vec in space.documents)
            {
                var associated = space.labelToDocumentAssociations.GetAllLinked(vec);

                Int32 lbi = -1;


                FeatureVectorWithLabelID fvl = null;

                if (associated.Any())
                {
                    lbi = labels.IndexOf(associated.First().name);
                }
                else
                {
                    lbi = labels.IndexOf(SpaceLabel.UNKNOWN);
                }

                fvl = new FeatureVectorWithLabelID(vec, lbi);
                dataset.Add(fvl);
            }

            Deploy(dataset, logger, labels);
        }
コード例 #2
0
        //public ExperimentTruthTable ConstructTruthTable(FeatureSpace space, ILogBuilder log)
        //{

        //    ExperimentTruthTable output = new ExperimentTruthTable();

        //    output.Deploy(space, )

        //    List<String> labels = space.labelToDocumentAssociations.GetAllDistinctNames(true);

        //    List<FeatureVectorWithLabelID> dataset = new List<FeatureVectorWithLabelID>();

        //    foreach (FeatureVector vec in space.documents)
        //    {
        //        var associated = space.labelToDocumentAssociations.GetAllLinked(vec);

        //        Int32 lbi = -1;


        //        FeatureVectorWithLabelID fvl = null;

        //        if (associated.Any())
        //        {
        //            lbi = labels.IndexOf(associated.First().name);
        //        }
        //        else
        //        {
        //            lbi = labels.IndexOf(SpaceLabel.UNKNOWN);
        //        }

        //        fvl = new FeatureVectorWithLabelID(vec, lbi);
        //        dataset.Add(fvl);
        //    }

        //    output.Deploy(dataset, log);
        //    return output;

        //}


        public void DistributeTrainingAndTestSets(ClassificationDatasetSeparationEnum distributionRule, ExperimentTruthTable truthTable, FeatureSpace featureSpace, ILogBuilder log, List <FeatureVectorWithLabelID> testSet, List <FeatureVectorWithLabelID> trainingSet)
        {
            log.log("Spliting data [" + distributionRule.ToString() + "]");

            List <FeatureVectorWithLabelID> __testSet     = new List <FeatureVectorWithLabelID>();
            List <FeatureVectorWithLabelID> __trainingSet = new List <FeatureVectorWithLabelID>();


            foreach (FeatureVector vec in featureSpace.documents)
            {
                var associated = featureSpace.labelToDocumentAssociations.GetAllLinked(vec);

                Int32 lbi = -1;


                FeatureVectorWithLabelID fvl = null;

                if (associated.Any())
                {
                    lbi = truthTable.labels_without_unknown.IndexOf(associated.First().name);
                }

                fvl = new FeatureVectorWithLabelID(vec, lbi);

                if (lbi == -1)
                {
                    __testSet.Add(fvl);
                }
                else
                {
                    __trainingSet.Add(fvl);
                }
            }



            if (!__testSet.Any())
            {
                notes.log("TEST SET IS EMPTY ---- APPLYING 1:1 EXPERIMENT SHEME: training and test set are the same");
                __trainingSet.ForEach(x => __testSet.Add(x));
            }
            else
            {
                if (distributionRule.HasFlag(ClassificationDatasetSeparationEnum.TestLabeled))
                {
                    testSet.AddRange(__trainingSet, true);
                }

                if (distributionRule.HasFlag(ClassificationDatasetSeparationEnum.TestUnlabeled))
                {
                    testSet.AddRange(__testSet, true);
                }

                if (distributionRule.HasFlag(ClassificationDatasetSeparationEnum.TrainingLabeled))
                {
                    trainingSet.AddRange(__trainingSet, true);
                }

                if (distributionRule.HasFlag(ClassificationDatasetSeparationEnum.TrainingUnlabeled))
                {
                    trainingSet.AddRange(__testSet, true);
                }
            }


            log.log("Training [" + trainingSet.Count + "] - Testing [" + testSet.Count + "]");

            //switch (distributionRule)
            //{
            //    case ClassificationDatasetSeparationEnum.TrainingAll_TestAll:
            //
            //        break;
            //    case ClassificationDatasetSeparationEnum.TrainingAll_TestUnlabeled:

            //        break;
            //    case ClassificationDatasetSeparationEnum.TrainingLabeled_TestAll:
            //        trainingSet.ForEach(x => testSet.Add(x.vector));
            //        break;
            //    case ClassificationDatasetSeparationEnum.TrainingLabeled_TestUnlabeled:
            //        // just fine
            //        break;
            //}
        }
コード例 #3
0
        public static DataTable MakeTable(this FeatureSpace space, FeatureVectorConstructor constructor, String name, String description)
        {
            DataTable table = new DataTable();

            table.SetTitle(name);
            table.SetDescription(description);

            table.SetAdditionalInfoEntry("Documents", space.documents.Count, "Total count of document vectors");
            table.SetAdditionalInfoEntry("Dimensions", constructor.dimensionFunctionSet.Count, "Number of dimensions");

            DataColumn column_rank = table.Add("Nr", "Order of appereance", "N", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal).SetWidth(10);

            DataColumn        column_token = table.Add("Name", "Name of the document vector", "Name", typeof(String), imbSCI.Core.enums.dataPointImportance.normal).SetWidth(50);
            List <DataColumn> dimensions   = new List <DataColumn>();


            foreach (FeatureSpaceDimensionBase dim in constructor.dimensionFunctionSet)
            {
                DataColumn dim_col = null;
                String     prefix  = dimensions.Count.ToString("D3");
                if (dim is FeatureSpaceDimensionSimilarity dimSim)
                {
                    dim_col = table.Add(prefix + "-" + dimSim.classVector.name, "Dimension computed as [" + dimSim.similarityFunction.GetType().Name + "] between document vector and [" + dimSim.classVector.name + "]", "D" + prefix, typeof(Double), imbSCI.Core.enums.dataPointImportance.important, "F5");
                    dimensions.Add(dim_col);
                }
            }

            DataColumn column_label = table.Add("Label", "Affiliation to a category", "Label", typeof(String), imbSCI.Core.enums.dataPointImportance.normal).SetWidth(50);

            Int32 r = 1;

            foreach (var docVec in space.documents)
            {
                var dr = table.NewRow();

                dr[column_rank]  = r;
                dr[column_token] = docVec.name;
                Int32 di = 0;
                foreach (DataColumn dc in dimensions)
                {
                    if (di < docVec.dimensions.Length)
                    {
                        Double val = docVec.dimensions[di];
                        dr[dc] = val;
                    }

                    di++;
                }

                table.Rows.Add(dr);
                String lbl_str = "";
                var    lbl     = space.labelToDocumentAssociations.GetAllLinked(docVec).FirstOrDefault();
                if (lbl != null)
                {
                    lbl_str = lbl.name;
                }

                dr[column_label] = lbl_str;

                r++;
            }

            return(table);
        }