private static Data.Attribute ParseAttributeLine(string line, int attributeIndex) { Data.Attribute attribute=null; string[] parts = line.Split(' '); string name=parts[1]; for (int i = 2; i < parts.Length - 1; i++) name += parts[i]; if (string.Compare(parts[parts.Length - 1].Trim(), "numeric", true) == 0) { attribute = new Data.NumericAttribute(name, attributeIndex); } else { string[] values = line.Split(' '); values = values[values.Length - 1].Trim('{', '}').Split(','); attribute = new Data.NominalAttribute(name, attributeIndex, values); } return attribute; }
private static Data.Example ParseExampleLine(int exampleIndex, string line, Data.Dataset dataset) { string[] parts = line.Split(','); List<double> values = new List<double>(); int label = dataset.Metadata.Target.GetIndex(parts[parts.Length - 1]); for (int index = 0; index < parts.Length - 1; index++) { if (dataset.Metadata.Attributes[index] is Data.NominalAttribute) { Data.NominalAttribute attribute = dataset.Metadata.Attributes[index] as Data.NominalAttribute; string value = parts[index]; values.Add(attribute.GetIndex(value)); } else { double value = double.NaN; if (parts[index] != "?") value = double.Parse(parts[index]); values.Add(value); } } Data.Example example = new Data.Example(dataset.Metadata, exampleIndex, values.ToArray(), label); return example; }
private static Data.Instance ParseHierarchicalinstanceLine(int instanceIndex, string line, Data.Dataset dataset, bool skipFirstAttribute) { string[] parts = line.Split(','); List <double> values = new List <double>(); List <int> label = new List <int>(); string[] labelParts = parts[parts.Length - 1].Split(';'); foreach (string target in labelParts) { if (!string.IsNullOrWhiteSpace(target)) { label.Add(dataset.Metadata.Target.GetIndex(target)); } } for (int index = 0; index < parts.Length - 1; index++) { if (dataset.Metadata.Attributes[index] is Data.NominalAttribute) { Data.NominalAttribute attribute = dataset.Metadata.Attributes[index] as Data.NominalAttribute; string value = parts[index]; values.Add(attribute.GetIndex(value)); } else { double value = double.NaN; if (parts[index] != "?") { value = double.Parse(parts[index]); } values.Add(value); } } Data.Instance instance = new Data.Instance(dataset.Metadata, instanceIndex, values.ToArray(), label); return(instance); }
private void UpdateValueCounts() { for (int attributeIndex = 0; attributeIndex < this._metadata.Attributes.Length; attributeIndex++) { if (this._metadata.Attributes[attributeIndex] is NominalAttribute) { Data.NominalAttribute currentAttribute = this._metadata.Attributes[attributeIndex] as NominalAttribute; for (int valueIndex = 0; valueIndex < currentAttribute.Values.Length; valueIndex++) { currentAttribute.ValueCounts[valueIndex] = this.Filter(attributeIndex, valueIndex).Count; } for (int valueIndex = 0; valueIndex < this._metadata.Target.Values.Length; valueIndex++) { this._metadata.Target.ValueCounts[valueIndex] = this.Filter(valueIndex).Count; } } } this._metadata.Size = this._examples.Length; }
public static Data.Dataset LoadDatasetFromArff(string filePath) { StreamReader reader = new StreamReader(filePath); string datasetName = null; Data.Metadata metadata = null; Data.Dataset dataset = null; List<Data.Attribute> attributes = new List<Data.Attribute>(); List<Data.Example> examples = new List<Data.Example>(); int attributeIndex = 0; int exampleIndex = 0; List<string> exampleLines = new List<string>(); while (!reader.EndOfStream) { string line = reader.ReadLine(); if (!string.IsNullOrEmpty(line)) { if (line.Contains("%")) continue; if (line.Contains("@relation")) { datasetName = line.Substring(line.IndexOf(' ')).Trim(); datasetName = datasetName.Contains("-weka") ? datasetName.Substring(0, datasetName.IndexOf("-weka")) : datasetName; } else if (line.Contains("@attribute")) { Data.Attribute attribute = ParseAttributeLine(line, attributeIndex); if (attribute != null) { attributeIndex++; attributes.Add(attribute); } } else if (line.Contains("@data")) { int targetIndex = attributes.FindIndex(m => string.Compare(m.Name, "Class", true) == 0); Data.NominalAttribute target = attributes[targetIndex] as Data.NominalAttribute; attributes.RemoveAt(target.Index); metadata = new Data.Metadata(datasetName, attributes.ToArray(), target,false); dataset = new Data.Dataset(metadata); } else { if (exampleLines.Contains(line)) continue; Data.Example example = ParseExampleLine(exampleIndex, line, dataset); exampleIndex++; examples.Add(example); exampleLines.Add(line); } } } dataset.SetExamples(examples.ToArray()); return dataset; }