public double CalculateMeasure(DataMining.Model.IClassifier classifier, Data.Dataset dataset) { double MSE = 0; Example example = null; for (int i = 0; i < dataset.Size; i++) { example = dataset[i]; int actual = example.Label; Prediction prediction = classifier.Classify(example); int predicted = prediction.Label; double probability = prediction.Probabilities[prediction.Label]; MSE += Math.Pow(1 - prediction.Probabilities[actual], 2); } MSE = Math.Sqrt(MSE) / dataset.Size; return(MSE); }
public double CalculateMeasure(DataMining.Model.IClassifier classifier, Data.Dataset dataset) { double QLF = 0; Example example = null; for (int i = 0; i < dataset.Size; i++) { example = dataset[i]; int actual = example.Label; Prediction prediction = classifier.Classify(example); int predicted = prediction.Label; double probability = prediction.Probabilities[prediction.Label]; double value = 0; for (int index = 0; index < dataset.Metadata.Target.Length; index++) { if (index == actual) { value -= 2 * probability; } else { value += Math.Pow(prediction.Probabilities[index], 2); } } QLF += (2 + value) / 3; } QLF = QLF / dataset.Size; return(QLF); }
public double CalculateMeasure(DataMining.Model.IClassifier classifier, Data.Dataset dataset) { double BIReward = 0; double[] classProbabilities = new double[dataset.Metadata.Target.Length]; for (int i = 0; i < classProbabilities.Length; i++) { classProbabilities[i] = (double)dataset.Filter(i).Count / (double)dataset.Size; } Example example = null; for (int i = 0; i < dataset.Size; i++) { example = dataset[i]; int actual = example.Label; Prediction prediction = classifier.Classify(example); int predicted = prediction.Label; double probability = prediction.Probabilities[prediction.Label]; if (predicted == actual) { BIReward += Math.Log(probability / classProbabilities[actual]); } else { BIReward += Math.Log((1 - probability) / (1 - classProbabilities[actual])); } } BIReward = (BIReward / dataset.Size) + 0.5; return(BIReward); }
public static void SaveDatasetToArff(Data.Dataset dataset, string filePath) { using (StreamWriter writer = new StreamWriter(filePath)) { writer.WriteLine("@relation " + dataset.Metadata.DatasetName); writer.WriteLine(); foreach (Data.Attribute attribute in dataset.Metadata.Attributes) { writer.WriteLine("@attribute " + attribute.ToString()); } writer.WriteLine("@attribute " + dataset.Metadata.Target.ToString()); writer.WriteLine(); writer.WriteLine("@data"); foreach (Data.Instance instance in dataset) { writer.WriteLine(instance.ToString()); } writer.Flush(); writer.Close(); } }
public double CalculateMeasure(DataMining.Model.IClassifier classifier, Data.Dataset dataset) { return(this.CalculateMeasure(ConfusionMatrix.ComputeConfusionMatrixes(classifier, dataset))); }
public static ConfusionMatrix[] ComputeConfusionMatrixes(Model.IClassifier classifier, Data.Dataset testset) { ConfusionMatrix[] list = new ConfusionMatrix[testset.Metadata.Target.Values.Length]; Instance instance = null; for (int i = 0; i < testset.Size; i++) { instance = testset[i]; bool correct = false; int actual = instance.Label; Prediction prediction = classifier.Classify(instance); int predicted = prediction.Label; double probability = prediction.Probabilities[prediction.Label]; if (predicted == actual) { correct = true; } for (int classIndex = 0; classIndex < list.Length; classIndex++) { if (correct) { if (classIndex == actual) { list[classIndex].TP++; list[classIndex].TP_Prob += probability; } else { list[classIndex].TN++; list[classIndex].TN_Prob += probability; } } else { if (classIndex == actual) { list[classIndex].FN++; list[classIndex].FN_Prob += probability; } else if (classIndex == predicted) { list[classIndex].FP++; list[classIndex].FP_Prob += probability; } else { list[classIndex].TN++; list[classIndex].TN_Prob += probability; } } } } return(list); }
public ClassBasedSimilarityMeasure(Data.Dataset dataset) { this._dataset = dataset; this.CalculateBaseSimilarities(); }
private static Data.Example ParseHierarchicalExampleLine(int exampleIndex, string line, Data.Dataset dataset, bool skipFirstAttribute) { string[] parts = line.Split(','); List<double> values = new List<double>(); List<int> label = new List<int>(); string[] labelParts = parts[parts.Length - 1].Split(';'); foreach (string target in labelParts) if (!string.IsNullOrWhiteSpace(target)) label.Add(dataset.Metadata.Target.GetIndex(target)); for (int index = 0; index < parts.Length - 1; index++) { if (dataset.Metadata.Attributes[index] is Data.NominalAttribute) { Data.NominalAttribute attribute = dataset.Metadata.Attributes[index] as Data.NominalAttribute; string value = parts[index]; values.Add(attribute.GetIndex(value)); } else { double value = double.NaN; if (parts[index] != "?") value = double.Parse(parts[index]); values.Add(value); } } Data.Example example = new Data.Example(dataset.Metadata, exampleIndex, values.ToArray(), label); return example; }
public static Data.Dataset LoadHierarchicalDatasetFromTxt(string filePath, bool skipfirstAttribute) { StreamReader reader = new StreamReader(filePath); string datasetName = null; Data.Metadata metadata = null; Data.Dataset dataset = null; List<Data.Attribute> attributes = new List<Data.Attribute>(); List<Data.Example> examples = new List<Data.Example>(); List<Data.Node> nodes = new List<Data.Node>(); int attributeIndex = 0; int exampleIndex = 0; string mode = "start"; while (!reader.EndOfStream) { string line = reader.ReadLine(); if (!string.IsNullOrEmpty(line)) { if (line.Contains("%")) continue; if (line.Contains("@relation")) { datasetName = line.Substring(line.IndexOf(' ')).Trim(); datasetName = datasetName.Contains("-weka") ? datasetName.Substring(0, datasetName.IndexOf("-weka")) : datasetName; } else if (line.Contains("@attribute")) { Data.Attribute attribute = ParseAttributeLine(line, attributeIndex); if (attribute != null) { attributeIndex++; attributes.Add(attribute); } } else if (line.Contains("@ontology")) { mode = "ontolog"; } else if (line.Contains("@data")) { List<string> classValues = new List<string>(); int counter = 0; for(int i=1; i<nodes.Count;i++) { Data.Node node = nodes[i]; node.ValueIndex = counter; classValues.Add(node.Name); counter++; } Data.ClassHierarchy classHierarchy = new Data.ClassHierarchy(nodes.ToArray()); Data.HierarchicalAttribute target = new Data.HierarchicalAttribute("class", attributes.Count, classValues.ToArray(), classHierarchy); metadata = new Data.Metadata(datasetName, attributes.ToArray(), target,true); dataset = new Data.Dataset(metadata); mode = "data"; } else { if (mode == "ontolog") { Data.Node node = ParseOntologyLine(line); if (!nodes.Exists(n => n.Name == node.Name)) nodes.Add(node); } else { Data.Example example = ParseHierarchicalExampleLine(exampleIndex, line, dataset, skipfirstAttribute); exampleIndex++; examples.Add(example); } } } } dataset.SetExamples(examples.ToArray()); return dataset; }
public static Data.Dataset LoadDatasetFromArff(string filePath) { StreamReader reader = new StreamReader(filePath); string datasetName = null; Data.Metadata metadata = null; Data.Dataset dataset = null; List<Data.Attribute> attributes = new List<Data.Attribute>(); List<Data.Example> examples = new List<Data.Example>(); int attributeIndex = 0; int exampleIndex = 0; List<string> exampleLines = new List<string>(); while (!reader.EndOfStream) { string line = reader.ReadLine(); if (!string.IsNullOrEmpty(line)) { if (line.Contains("%")) continue; if (line.Contains("@relation")) { datasetName = line.Substring(line.IndexOf(' ')).Trim(); datasetName = datasetName.Contains("-weka") ? datasetName.Substring(0, datasetName.IndexOf("-weka")) : datasetName; } else if (line.Contains("@attribute")) { Data.Attribute attribute = ParseAttributeLine(line, attributeIndex); if (attribute != null) { attributeIndex++; attributes.Add(attribute); } } else if (line.Contains("@data")) { int targetIndex = attributes.FindIndex(m => string.Compare(m.Name, "Class", true) == 0); Data.NominalAttribute target = attributes[targetIndex] as Data.NominalAttribute; attributes.RemoveAt(target.Index); metadata = new Data.Metadata(datasetName, attributes.ToArray(), target,false); dataset = new Data.Dataset(metadata); } else { if (exampleLines.Contains(line)) continue; Data.Example example = ParseExampleLine(exampleIndex, line, dataset); exampleIndex++; examples.Add(example); exampleLines.Add(line); } } } dataset.SetExamples(examples.ToArray()); return dataset; }
private static Data.Instance ParseinstanceLine(int instanceIndex, string line, Data.Dataset dataset) { string[] parts = line.Split(','); List <double> values = new List <double>(); int label = dataset.Metadata.Target.GetIndex(parts[parts.Length - 1]); for (int index = 0; index < parts.Length - 1; index++) { if (dataset.Metadata.Attributes[index] is Data.NominalAttribute) { Data.NominalAttribute attribute = dataset.Metadata.Attributes[index] as Data.NominalAttribute; string value = parts[index]; values.Add(attribute.GetIndex(value)); } else { double value = double.NaN; if (parts[index] != "?") { value = double.Parse(parts[index]); } values.Add(value); } } Data.Instance instance = new Data.Instance(dataset.Metadata, instanceIndex, values.ToArray(), label); return(instance); }
public static ConfusionMatrix[] GetConfusionMatrixes(Model.Hierarchical.IHierarchicalClassifier classifier, Data.Dataset testset) { ConfusionMatrix[] list = new ConfusionMatrix[testset.Metadata.Target.Values.Length]; foreach (Data.Example example in testset) { int[] predicted = classifier.Classify(example); int[] actual = example.HierarchicalLabel; for (int classIndex = 0; classIndex < list.Length; classIndex++) { if (predicted.Contains(classIndex)) { if (actual.Contains(classIndex)) { list[classIndex].TP++; } else { list[classIndex].FP++; } } else { if (actual.Contains(classIndex)) { list[classIndex].FN++; } else { list[classIndex].TN++; } } } } return(list); }
public static ConfusionMatrix[] GetConfusionMatrixes(Model.IClassifier classifier, Data.Dataset testset) { ConfusionMatrix[] list = new ConfusionMatrix[testset.Metadata.Target.Values.Length]; foreach (Data.Example example in testset) { bool correct = false; int actual = example.Label; Prediction prediction = classifier.Classify(example); int predicted = prediction.Label; double probability = prediction.Probability; if (predicted == actual) { correct = true; } for (int classIndex = 0; classIndex < list.Length; classIndex++) { if (correct) { if (classIndex == actual) { list[classIndex].TP++; list[classIndex].TP_Prob += probability; } else { list[classIndex].TN++; list[classIndex].TN_Prob += probability; } } else { if (classIndex == actual) { list[classIndex].FN++; list[classIndex].FN_Prob += probability; } else if (classIndex == predicted) { list[classIndex].FP++; list[classIndex].FP_Prob += probability; } else { list[classIndex].TN++; list[classIndex].TN_Prob += probability; } } } } return(list); }
private static Data.Example ParseHierarchicalExampleLine(int exampleIndex, string line, Data.Dataset dataset, bool skipFirstAttribute) { string[] parts = line.Split(','); List <int> values = new List <int>(); List <int> label = new List <int>(); string[] labelParts = parts[parts.Length - 1].Split(';'); foreach (string target in labelParts) { if (!string.IsNullOrWhiteSpace(target)) { label.Add(dataset.Metadata.Target.GetIndex(target)); } } int skips = 0; int step = skipFirstAttribute ? 1 : 0; int index = skipFirstAttribute ? 1 : 0; for (; index < parts.Length - 1; index++) { string value = parts[index]; if (!value.Contains("All")) { values.Add(dataset.Metadata.Attributes[index - step - skips].GetIndex(value)); } else { skips++; } } Data.Example example = new Data.Example(dataset.Metadata, exampleIndex, values.ToArray(), label); return(example); }
public static Data.Dataset LoadDatasetFromArff(string filePath) { StreamReader reader = new StreamReader(filePath); string datasetName = null; Data.Metadata metadata = null; Data.Dataset dataset = null; List <Data.Attribute> attributes = new List <Data.Attribute>(); List <Data.Example> examples = new List <Data.Example>(); string[] target = null; int attributeIndex = 0; int exampleIndex = 0; List <int> nominalAttributesIndexes = new List <int>(); int aindex = 0; List <string> exampleLines = new List <string>(); while (!reader.EndOfStream) { string line = reader.ReadLine(); if (!string.IsNullOrEmpty(line)) { if (line.Contains("%")) { continue; } if (line.Contains("@relation")) { datasetName = line.Substring(line.IndexOf(' ')).Trim(); datasetName = datasetName.Contains("-weka") ? datasetName.Substring(0, datasetName.IndexOf("-weka")) : datasetName; } else if (line.Contains("@attribute")) { if (line.ToUpper().Contains("CLASS")) { target = ParseClassAttributeLine(line); } else { Data.Attribute [] currentAttributes = ParseAttributeLine(line, attributeIndex); attributeIndex += currentAttributes.Length; attributes.AddRange(currentAttributes); if (currentAttributes.Length > 1 || !line.Contains(currentAttributes[0].Name)) { nominalAttributesIndexes.Add(aindex); } aindex++; } } else if (line.Contains("@data")) { metadata = new Data.Metadata(datasetName, attributes.ToArray(), target); dataset = new Data.Dataset(metadata); } else { if (exampleLines.Contains(line)) { continue; } Data.Example example = ParseExampleLine(exampleIndex, line, dataset.Metadata, nominalAttributesIndexes); exampleIndex++; examples.Add(example); exampleLines.Add(line); } } } dataset.SetExamples(examples.ToArray()); return(dataset); }