public int Classify(List <ClassifierInfo> classifiersInfo, Data.Example example) { Dictionary <int, int> votesCount = new Dictionary <int, int>(); foreach (ClassifierInfo current in classifiersInfo) { int label = current.Classifier.Classify(example).Label; if (!votesCount.Keys.Contains(label)) { votesCount.Add(label, 1); } else { votesCount[label] += 1; } } int maxVote = -1; int resultLable = -1; foreach (KeyValuePair <int, int> entry in votesCount) { if (entry.Value > maxVote) { maxVote = entry.Value; resultLable = entry.Key; } } return(resultLable); }
public double CalculateDistance(Data.Example example1, Data.Example example2) { double distance = 0; int attributeCount = example1.Values.Length;; for (int attributeIndex = 0; attributeIndex < example1.Values.Length; attributeIndex++) { if (example1.Metadata.Attributes[attributeIndex] is NominalAttribute) { distance += (example1[attributeIndex] == example2[attributeIndex]) ? 0 : 1; } else { distance += Math.Pow(example1[attributeIndex] - example2[attributeIndex], this._order); } } distance = Math.Pow(distance, 1.0 / (double)this._order); //double maxDistance = Math.Pow(attributeCount, 1.0 / (double)this._order); //double similarity = (maxDistance - distance) / distance; //return similarity; return(distance); }
public int Classify(List <ClassifierInfo> classifiersInfo, List <Data.Example> examples) { Dictionary <int, double> votesCount = new Dictionary <int, double>(); foreach (ClassifierInfo current in classifiersInfo) { Data.Example example = examples.Find(e => current.Desc.Contains(e.Metadata.DatasetName)); int label = current.Classifier.Classify(example).Label; if (!votesCount.Keys.Contains(label)) { votesCount.Add(label, current.Quality); } else { votesCount[label] += current.Quality; } } double maxVote = -1; int resultLable = -1; foreach (KeyValuePair <int, double> entry in votesCount) { if (entry.Value > maxVote) { maxVote = entry.Value; resultLable = entry.Key; } } return(resultLable); }
private static Data.Example ParseExampleLine(int exampleIndex, string line, Data.Dataset dataset) { string[] parts = line.Split(','); List<double> values = new List<double>(); int label = dataset.Metadata.Target.GetIndex(parts[parts.Length - 1]); for (int index = 0; index < parts.Length - 1; index++) { if (dataset.Metadata.Attributes[index] is Data.NominalAttribute) { Data.NominalAttribute attribute = dataset.Metadata.Attributes[index] as Data.NominalAttribute; string value = parts[index]; values.Add(attribute.GetIndex(value)); } else { double value = double.NaN; if (parts[index] != "?") value = double.Parse(parts[index]); values.Add(value); } } Data.Example example = new Data.Example(dataset.Metadata, exampleIndex, values.ToArray(), label); return example; }
public double CalculateSimilarity(Data.Example example1, Data.Example example2) { double similarity = 0; int seek = 0; for (int attributeIndex = 0; attributeIndex < this._dataset.Metadata.Attributes.Length; attributeIndex++) { int value1Index = example1[attributeIndex]; int value2Index = example2[attributeIndex]; if (value1Index == value2Index) { similarity += 1; } else { if (value1Index > value2Index) { int temp = value1Index; value1Index = value2Index; value2Index = temp; } int index = seek + GetAttributeValueSeek(attributeIndex, value1Index) + value2Index - value1Index - 1; similarity += this._similarities[index]; } seek += GetAttributeSeek(attributeIndex); } similarity /= this._dataset.Metadata.Attributes.Length; return(similarity); }
private static Data.Example ParseExampleLine(int exampleIndex, string line, Data.Dataset dataset) { string [] parts = line.Split(','); List <int> values = new List <int>(); int label = dataset.Metadata.Target.GetIndex(parts[parts.Length - 1]); int skips = 0; for (int index = 0; index < parts.Length - 1; index++) { string value = parts[index]; if (!value.Contains("All")) { values.Add(dataset.Metadata.Attributes[index - skips].GetIndex(value)); } else { skips++; } } Data.Example example = new Data.Example(dataset.Metadata, exampleIndex, values.ToArray(), label); return(example); }
public double CalculateDistance(Data.Example example1, Data.Example example2, double[][] weights) { double distance = 0; int classIndex = example2.Label; int attributeCount = example1.Values.Length;; for (int attributeIndex = 0; attributeIndex < attributeCount; attributeIndex++) { if (example1.Metadata.Attributes[attributeIndex] is NominalAttribute) { distance += (example1[attributeIndex] == example2[attributeIndex]) ? 0 : weights[classIndex][attributeIndex]; } else { distance += Math.Pow((example1[attributeIndex] - example2[attributeIndex]), this._order) * weights[classIndex][attributeIndex]; } } distance = Math.Pow(distance, 1.0 / (double)this._order); //double maxDistance = Math.Pow(attributeCount, 1.0 / (double)this._order); //double similarity = (maxDistance - distance) / maxDistance; //return similarity; //distance = Math.Pow(distance, 1.0 / (double)this._order) / attributeCount; //double similarity = 1.0 / (1.0 + distance); //return similarity; return(distance); }
public override int[] Classify(Data.Example example) { List <int> prediceted = new List <int>(); Queue <Node> candidates = new Queue <Node>(); foreach (string child in this._hierarchy.Root.Children) { candidates.Enqueue(this._hierarchy[child]); } while (candidates.Count != 0) { Node current = candidates.Dequeue(); List <ClassifierInfo> classifiersInfo = this[current.Name]; if (classifiersInfo != null) { int label = _ensembleStrategy.Classify(classifiersInfo, example); if (label == 0) { prediceted.Add(current.ValueIndex); if (current.Children != null) { foreach (string child in current.Children) { candidates.Enqueue(this._hierarchy[child]); } } } } } return(prediceted.Distinct().ToArray()); }
public override Model.Prediction Classify(Data.Example example) { double[] classCounts = new double[this._database.Metadata.Target.Values.Length]; double[] distances = new double[this._database.Size]; int[] exampleIndexes = new int[this._database.Size]; for (int exampleIndex = 0; exampleIndex < this._database.Size; exampleIndex++) { if (this._database[exampleIndex] == example) { continue; } exampleIndexes[exampleIndex] = exampleIndex; double distance = this._distanceMeasure.CalculateDistance(example, this._database[exampleIndex], this._classBasedWeights); if (distance == 0) { distances[exampleIndex] = distance; } distances[exampleIndex] = distance; } Array.Sort(distances, exampleIndexes); int max = 0; for (int counter = 0; counter < this._k; counter++) { int exampleIndex = exampleIndexes[counter]; int predicted = this._database[exampleIndex].Label; if (this._useWeightedVote) { classCounts[predicted] += (1.0 / 1.0 + distances[exampleIndex]); } else { classCounts[predicted] += 1; } if (classCounts[predicted] > classCounts[max]) { max = predicted; } } double sum = classCounts.Sum(); for (int i = 0; i < classCounts.Length; i++) { classCounts[i] /= sum; } Prediction prediction = new Prediction(max, classCounts); return(prediction); }
public double CalculateSimilarity(Data.Example example1, Data.Example example2) { double similarity = 0; for (int attributeIndex = 0; attributeIndex < example1.Values.Length; attributeIndex++) { similarity += (example1[attributeIndex] == example2[attributeIndex]) ? 1 : 0; } similarity /= example1.Metadata.Attributes.Length; return(similarity); }
public override Model.Prediction Classify(Data.Example example) { int[] classCount = new int[this._database.Metadata.Target.Values.Length]; double[] classSimilarity = new double[this._database.Metadata.Target.Values.Length]; for (int exampleIndex = 0; exampleIndex < this._database.Size; exampleIndex++) { if (this._database[exampleIndex] == example) { continue; } Example exampler = this._database[exampleIndex]; double distance = this._distanceMeasure.CalculateDistance(example, exampler, this._classBasedWeights); double similartiy = double.PositiveInfinity; if (distance > 0) { similartiy = 1 / distance; } if (similartiy > this.SimilarityThreshold) { classCount[exampler.Label] += 1; classSimilarity[exampler.Label] += similartiy; } } int max = 0; for (int i = 0; i < classCount.Length; i++) { //if (classCount[i] != 0) // classSimilarity[i] /= classCount[i]; if (classSimilarity[i] > classSimilarity[max]) { max = i; } } double sum = classSimilarity.Sum(); for (int i = 0; i < classSimilarity.Length; i++) { classSimilarity[i] /= sum; } Prediction prediction = new Prediction(max, classSimilarity); return(prediction); }
public int Classify(List <ClassifierInfo> classifiersInfo, Data.Example example) { ClassifierInfo best = classifiersInfo[0]; foreach (ClassifierInfo current in classifiersInfo) { if (current.Quality > best.Quality) { best = current; } } return(best.Classifier.Classify(example).Label); }
public int Classify(List <ClassifierInfo> classifiersInfo, List <Data.Example> examples) { ClassifierInfo best = classifiersInfo[0]; foreach (ClassifierInfo current in classifiersInfo) { if (current.Quality > best.Quality) { best = current; } } Data.Example bestExample = examples.Find(e => best.Desc.Contains(e.Metadata.DatasetName)); return(best.Classifier.Classify(bestExample).Label); }
public override int [] Classify(Data.Example example) { List <int> prediceted = new List <int>(); Queue <Node> candidates = new Queue <Node>(); foreach (string child in this._hierarchy.Root.Children) { candidates.Enqueue(this._hierarchy[child]); } while (candidates.Count != 0) { Node current = candidates.Dequeue(); List <Tuple <string, IClassifier> > classifiers = this._modelMapping[current.Name]; int [] values = new int[classifiers.Count]; int index = 0; foreach (Tuple <string, IClassifier> tuple in classifiers) { IClassifier classifier = tuple.Item2; int localLabel = classifier.Classify(example.Clone() as Example); values[index] = localLabel; } IClassifier metaClassifier = this._metaModel[current.Name]; Example metaExample = new Example(new Dataset(metaClassifier.MetaData), 0, values, -1); int label = metaClassifier.Classify(metaExample); if (label == 0) { prediceted.Add(current.ValueIndex); if (current.Children != null) { foreach (string child in current.Children) { candidates.Enqueue(this._hierarchy[child]); } } } } return(prediceted.Distinct().ToArray()); }
public int Classify(List <ClassifierInfo> classifiersInfo, Data.Example example) { IClassifier metaClassifier = classifiersInfo.Last().Classifier; List <int> predictionValues = new List <int>(); for (int i = 0; i < classifiersInfo.Count - 1; i++) { IClassifier classifier = classifiersInfo[i].Classifier; int label = classifier.Classify(example).Label; predictionValues.Add(label); } Data.Example metaExample = new Data.Example(metaClassifier.Metadata, 0, predictionValues.ToArray(), -1); int finalPredction = metaClassifier.Classify(metaExample).Label; return(finalPredction); }
public int Classify(List <ClassifierInfo> classifiersInfo, List <Data.Example> examples) { IClassifier metaClassifier = classifiersInfo.Last().Classifier; List <int> predictionValues = new List <int>(); for (int i = 0; i < classifiersInfo.Count - 1; i++) { ClassifierInfo info = classifiersInfo[i]; Data.Example example = examples.Find(e => info.Desc.Contains(e.Metadata.DatasetName)); int prediction = info.Classifier.Classify(example).Label; predictionValues.Add(prediction); } Data.Example metaExample = new Data.Example(null, -1, predictionValues.ToArray(), -1); int finalPredction = metaClassifier.Classify(metaExample).Label; return(finalPredction); }
public override Prediction Classify(Data.Example example) { List <int> prediceted = new List <int>(); Queue <Node> candidates = new Queue <Node>(); foreach (string child in this._hierarchy.Root.Children) { candidates.Enqueue(this._hierarchy[child]); } while (candidates.Count != 0) { Node current = candidates.Dequeue(); IClassifier classifier = this[current.Name]; Prediction prediction = classifier.Classify(example); if (prediction.Label == 0) { prediceted.Add(current.ValueIndex); if (current.Children != null) { foreach (string child in current.Children) { candidates.Enqueue(this._hierarchy[child]); } } } } double[] probabilities = new double[example.Metadata.Target.Values.Length]; foreach (int index in prediceted) { probabilities[index] = 1; } Prediction final = new Prediction(-1, probabilities); return(final); }
private static Data.Example ParseHierarchicalExampleLine(int exampleIndex, string line, Data.Dataset dataset, bool skipFirstAttribute) { string[] parts = line.Split(','); List <int> values = new List <int>(); List <int> label = new List <int>(); string[] labelParts = parts[parts.Length - 1].Split(';'); foreach (string target in labelParts) { if (!string.IsNullOrWhiteSpace(target)) { label.Add(dataset.Metadata.Target.GetIndex(target)); } } int skips = 0; int step = skipFirstAttribute ? 1 : 0; int index = skipFirstAttribute ? 1 : 0; for (; index < parts.Length - 1; index++) { string value = parts[index]; if (!value.Contains("All")) { values.Add(dataset.Metadata.Attributes[index - step - skips].GetIndex(value)); } else { skips++; } } Data.Example example = new Data.Example(dataset.Metadata, exampleIndex, values.ToArray(), label); return(example); }
private static Data.Example ParseHierarchicalExampleLine(int exampleIndex, string line, Data.Dataset dataset, bool skipFirstAttribute) { string[] parts = line.Split(','); List<double> values = new List<double>(); List<int> label = new List<int>(); string[] labelParts = parts[parts.Length - 1].Split(';'); foreach (string target in labelParts) if (!string.IsNullOrWhiteSpace(target)) label.Add(dataset.Metadata.Target.GetIndex(target)); for (int index = 0; index < parts.Length - 1; index++) { if (dataset.Metadata.Attributes[index] is Data.NominalAttribute) { Data.NominalAttribute attribute = dataset.Metadata.Attributes[index] as Data.NominalAttribute; string value = parts[index]; values.Add(attribute.GetIndex(value)); } else { double value = double.NaN; if (parts[index] != "?") value = double.Parse(parts[index]); values.Add(value); } } Data.Example example = new Data.Example(dataset.Metadata, exampleIndex, values.ToArray(), label); return example; }
public abstract Prediction Classify(Data.Example example);
private static Data.Example ParseExampleLine(int exampleIndex, string line, Data.Metadata metadata, List <int> nominalAttributesIndexes) { string [] parts = line.Split(','); List <double> values = new List <double>(); int label = 0; for (; label < metadata.Target.Length; label++) { if (metadata.Target[label] == parts[parts.Length - 1]) { break; } } int offset = 0; for (int index = 0; index < parts.Length - 1; index++) { string stringValue = parts[index]; if (!nominalAttributesIndexes.Contains(index)) { double value = double.NaN; if (stringValue != "?") { value = double.Parse(stringValue); } values.Add(value); offset++; } else { Data.Attribute attribute = metadata.Attributes[offset]; string attributeName = attribute.Name.Substring(0, attribute.Name.IndexOf("::")); while (true) { attribute = metadata.Attributes[offset]; if (!attribute.Name.Contains("::")) { break; } string currentAttributeName = attribute.Name.Substring(0, attribute.Name.IndexOf("::")); if (currentAttributeName != attributeName) { break; } string valueName = attribute.Name.Substring(attribute.Name.IndexOf("::") + 2); if (valueName == stringValue) { values.Add(1); } else { values.Add(0); } offset++; if (offset == metadata.Attributes.Length) { break; } } } } Data.Example example = new Data.Example(metadata, exampleIndex, values.ToArray(), label); return(example); }
public static Data.Dataset LoadDatasetFromArff(string filePath) { StreamReader reader = new StreamReader(filePath); string datasetName = null; Data.Metadata metadata = null; Data.Dataset dataset = null; List<Data.Attribute> attributes = new List<Data.Attribute>(); List<Data.Example> examples = new List<Data.Example>(); int attributeIndex = 0; int exampleIndex = 0; List<string> exampleLines = new List<string>(); while (!reader.EndOfStream) { string line = reader.ReadLine(); if (!string.IsNullOrEmpty(line)) { if (line.Contains("%")) continue; if (line.Contains("@relation")) { datasetName = line.Substring(line.IndexOf(' ')).Trim(); datasetName = datasetName.Contains("-weka") ? datasetName.Substring(0, datasetName.IndexOf("-weka")) : datasetName; } else if (line.Contains("@attribute")) { Data.Attribute attribute = ParseAttributeLine(line, attributeIndex); if (attribute != null) { attributeIndex++; attributes.Add(attribute); } } else if (line.Contains("@data")) { int targetIndex = attributes.FindIndex(m => string.Compare(m.Name, "Class", true) == 0); Data.NominalAttribute target = attributes[targetIndex] as Data.NominalAttribute; attributes.RemoveAt(target.Index); metadata = new Data.Metadata(datasetName, attributes.ToArray(), target,false); dataset = new Data.Dataset(metadata); } else { if (exampleLines.Contains(line)) continue; Data.Example example = ParseExampleLine(exampleIndex, line, dataset); exampleIndex++; examples.Add(example); exampleLines.Add(line); } } } dataset.SetExamples(examples.ToArray()); return dataset; }
public static Data.Dataset LoadHierarchicalDatasetFromTxt(string filePath, bool skipfirstAttribute) { StreamReader reader = new StreamReader(filePath); string datasetName = null; Data.Metadata metadata = null; Data.Dataset dataset = null; List<Data.Attribute> attributes = new List<Data.Attribute>(); List<Data.Example> examples = new List<Data.Example>(); List<Data.Node> nodes = new List<Data.Node>(); int attributeIndex = 0; int exampleIndex = 0; string mode = "start"; while (!reader.EndOfStream) { string line = reader.ReadLine(); if (!string.IsNullOrEmpty(line)) { if (line.Contains("%")) continue; if (line.Contains("@relation")) { datasetName = line.Substring(line.IndexOf(' ')).Trim(); datasetName = datasetName.Contains("-weka") ? datasetName.Substring(0, datasetName.IndexOf("-weka")) : datasetName; } else if (line.Contains("@attribute")) { Data.Attribute attribute = ParseAttributeLine(line, attributeIndex); if (attribute != null) { attributeIndex++; attributes.Add(attribute); } } else if (line.Contains("@ontology")) { mode = "ontolog"; } else if (line.Contains("@data")) { List<string> classValues = new List<string>(); int counter = 0; for(int i=1; i<nodes.Count;i++) { Data.Node node = nodes[i]; node.ValueIndex = counter; classValues.Add(node.Name); counter++; } Data.ClassHierarchy classHierarchy = new Data.ClassHierarchy(nodes.ToArray()); Data.HierarchicalAttribute target = new Data.HierarchicalAttribute("class", attributes.Count, classValues.ToArray(), classHierarchy); metadata = new Data.Metadata(datasetName, attributes.ToArray(), target,true); dataset = new Data.Dataset(metadata); mode = "data"; } else { if (mode == "ontolog") { Data.Node node = ParseOntologyLine(line); if (!nodes.Exists(n => n.Name == node.Name)) nodes.Add(node); } else { Data.Example example = ParseHierarchicalExampleLine(exampleIndex, line, dataset, skipfirstAttribute); exampleIndex++; examples.Add(example); } } } } dataset.SetExamples(examples.ToArray()); return dataset; }
public abstract int[] Classify(Data.Example example);
public static Data.Dataset LoadDatasetFromArff(string filePath) { StreamReader reader = new StreamReader(filePath); string datasetName = null; Data.Metadata metadata = null; Data.Dataset dataset = null; List <Data.Attribute> attributes = new List <Data.Attribute>(); List <Data.Example> examples = new List <Data.Example>(); string[] target = null; int attributeIndex = 0; int exampleIndex = 0; List <int> nominalAttributesIndexes = new List <int>(); int aindex = 0; List <string> exampleLines = new List <string>(); while (!reader.EndOfStream) { string line = reader.ReadLine(); if (!string.IsNullOrEmpty(line)) { if (line.Contains("%")) { continue; } if (line.Contains("@relation")) { datasetName = line.Substring(line.IndexOf(' ')).Trim(); datasetName = datasetName.Contains("-weka") ? datasetName.Substring(0, datasetName.IndexOf("-weka")) : datasetName; } else if (line.Contains("@attribute")) { if (line.ToUpper().Contains("CLASS")) { target = ParseClassAttributeLine(line); } else { Data.Attribute [] currentAttributes = ParseAttributeLine(line, attributeIndex); attributeIndex += currentAttributes.Length; attributes.AddRange(currentAttributes); if (currentAttributes.Length > 1 || !line.Contains(currentAttributes[0].Name)) { nominalAttributesIndexes.Add(aindex); } aindex++; } } else if (line.Contains("@data")) { metadata = new Data.Metadata(datasetName, attributes.ToArray(), target); dataset = new Data.Dataset(metadata); } else { if (exampleLines.Contains(line)) { continue; } Data.Example example = ParseExampleLine(exampleIndex, line, dataset.Metadata, nominalAttributesIndexes); exampleIndex++; examples.Add(example); exampleLines.Add(line); } } } dataset.SetExamples(examples.ToArray()); return(dataset); }