//public void Deploy(FeatureVectorDictionary dataset, ILogBuilder logger) //{ // Deploy(dataset.GetVectorsWithLabelID(d)) //} public void Deploy(FeatureSpace space, List <String> labels, ILogBuilder logger) { //List<String> labels = space.labelToDocumentAssociations.GetAllDistinctNames(true); List <FeatureVectorWithLabelID> dataset = new List <FeatureVectorWithLabelID>(); foreach (FeatureVector vec in space.documents) { var associated = space.labelToDocumentAssociations.GetAllLinked(vec); Int32 lbi = -1; FeatureVectorWithLabelID fvl = null; if (associated.Any()) { lbi = labels.IndexOf(associated.First().name); } else { lbi = labels.IndexOf(SpaceLabel.UNKNOWN); } fvl = new FeatureVectorWithLabelID(vec, lbi); dataset.Add(fvl); } Deploy(dataset, logger, labels); }
//public ExperimentTruthTable ConstructTruthTable(FeatureSpace space, ILogBuilder log) //{ // ExperimentTruthTable output = new ExperimentTruthTable(); // output.Deploy(space, ) // List<String> labels = space.labelToDocumentAssociations.GetAllDistinctNames(true); // List<FeatureVectorWithLabelID> dataset = new List<FeatureVectorWithLabelID>(); // foreach (FeatureVector vec in space.documents) // { // var associated = space.labelToDocumentAssociations.GetAllLinked(vec); // Int32 lbi = -1; // FeatureVectorWithLabelID fvl = null; // if (associated.Any()) // { // lbi = labels.IndexOf(associated.First().name); // } // else // { // lbi = labels.IndexOf(SpaceLabel.UNKNOWN); // } // fvl = new FeatureVectorWithLabelID(vec, lbi); // dataset.Add(fvl); // } // output.Deploy(dataset, log); // return output; //} public void DistributeTrainingAndTestSets(ClassificationDatasetSeparationEnum distributionRule, ExperimentTruthTable truthTable, FeatureSpace featureSpace, ILogBuilder log, List <FeatureVectorWithLabelID> testSet, List <FeatureVectorWithLabelID> trainingSet) { log.log("Spliting data [" + distributionRule.ToString() + "]"); List <FeatureVectorWithLabelID> __testSet = new List <FeatureVectorWithLabelID>(); List <FeatureVectorWithLabelID> __trainingSet = new List <FeatureVectorWithLabelID>(); foreach (FeatureVector vec in featureSpace.documents) { var associated = featureSpace.labelToDocumentAssociations.GetAllLinked(vec); Int32 lbi = -1; FeatureVectorWithLabelID fvl = null; if (associated.Any()) { lbi = truthTable.labels_without_unknown.IndexOf(associated.First().name); } fvl = new FeatureVectorWithLabelID(vec, lbi); if (lbi == -1) { __testSet.Add(fvl); } else { __trainingSet.Add(fvl); } } if (!__testSet.Any()) { notes.log("TEST SET IS EMPTY ---- APPLYING 1:1 EXPERIMENT SHEME: training and test set are the same"); __trainingSet.ForEach(x => __testSet.Add(x)); } else { if (distributionRule.HasFlag(ClassificationDatasetSeparationEnum.TestLabeled)) { testSet.AddRange(__trainingSet, true); } if (distributionRule.HasFlag(ClassificationDatasetSeparationEnum.TestUnlabeled)) { testSet.AddRange(__testSet, true); } if (distributionRule.HasFlag(ClassificationDatasetSeparationEnum.TrainingLabeled)) { trainingSet.AddRange(__trainingSet, true); } if (distributionRule.HasFlag(ClassificationDatasetSeparationEnum.TrainingUnlabeled)) { trainingSet.AddRange(__testSet, true); } } log.log("Training [" + trainingSet.Count + "] - Testing [" + testSet.Count + "]"); //switch (distributionRule) //{ // case ClassificationDatasetSeparationEnum.TrainingAll_TestAll: // // break; // case ClassificationDatasetSeparationEnum.TrainingAll_TestUnlabeled: // break; // case ClassificationDatasetSeparationEnum.TrainingLabeled_TestAll: // trainingSet.ForEach(x => testSet.Add(x.vector)); // break; // case ClassificationDatasetSeparationEnum.TrainingLabeled_TestUnlabeled: // // just fine // break; //} }
public static DataTable MakeTable(this FeatureSpace space, FeatureVectorConstructor constructor, String name, String description) { DataTable table = new DataTable(); table.SetTitle(name); table.SetDescription(description); table.SetAdditionalInfoEntry("Documents", space.documents.Count, "Total count of document vectors"); table.SetAdditionalInfoEntry("Dimensions", constructor.dimensionFunctionSet.Count, "Number of dimensions"); DataColumn column_rank = table.Add("Nr", "Order of appereance", "N", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal).SetWidth(10); DataColumn column_token = table.Add("Name", "Name of the document vector", "Name", typeof(String), imbSCI.Core.enums.dataPointImportance.normal).SetWidth(50); List <DataColumn> dimensions = new List <DataColumn>(); foreach (FeatureSpaceDimensionBase dim in constructor.dimensionFunctionSet) { DataColumn dim_col = null; String prefix = dimensions.Count.ToString("D3"); if (dim is FeatureSpaceDimensionSimilarity dimSim) { dim_col = table.Add(prefix + "-" + dimSim.classVector.name, "Dimension computed as [" + dimSim.similarityFunction.GetType().Name + "] between document vector and [" + dimSim.classVector.name + "]", "D" + prefix, typeof(Double), imbSCI.Core.enums.dataPointImportance.important, "F5"); dimensions.Add(dim_col); } } DataColumn column_label = table.Add("Label", "Affiliation to a category", "Label", typeof(String), imbSCI.Core.enums.dataPointImportance.normal).SetWidth(50); Int32 r = 1; foreach (var docVec in space.documents) { var dr = table.NewRow(); dr[column_rank] = r; dr[column_token] = docVec.name; Int32 di = 0; foreach (DataColumn dc in dimensions) { if (di < docVec.dimensions.Length) { Double val = docVec.dimensions[di]; dr[dc] = val; } di++; } table.Rows.Add(dr); String lbl_str = ""; var lbl = space.labelToDocumentAssociations.GetAllLinked(docVec).FirstOrDefault(); if (lbl != null) { lbl_str = lbl.name; } dr[column_label] = lbl_str; r++; } return(table); }