/// <summary> /// Initializes a new instance of the <see cref="Version"/> class. /// </summary> /// <param name="versionId">The <see cref="VersionId"/>.</param> /// <param name="revision">The revision.</param> /// <param name="metadata">The <see cref="Metadata"/>.</param> /// <param name="content">The content.</param> /// <remarks> /// Enables actions: all /// </remarks> public Version(VersionId versionId, string revision, Metadata metadata, Content content) { VersionId = versionId; Revision = revision; Metadata = metadata; if (Metadata == null) { Metadata = new Data.Metadata(); } Content = content; }
public Dataset GetBinaryFlatLabelSubDataset(string positiveClassValue, string[] negativeClassValues) { string name = this._metadata.DatasetName; int classIndex = this._metadata.Target.GetIndex(positiveClassValue); DataMining.Data.Attribute[] attClone = new Attribute[this.Metadata.Attributes.Length]; for (int attributeIndex = 0; attributeIndex < this.Metadata.Attributes.Length; attributeIndex++) { attClone[attributeIndex] = this.Metadata.Attributes[attributeIndex].Clone(); } Data.NominalAttribute target = new NominalAttribute("class", this._metadata.Target.Index, new string[] { "Yes", "No" }); DataMining.Data.Metadata metadata = new Data.Metadata(name, attClone, target, false); Dataset dsResult = new Dataset(metadata); int positiveClassIndex = this._metadata.Target.GetIndex(positiveClassValue); List <Example> positive = new List <Example>(); int pcounter = 0; foreach (int exampleIndex in this.Filter(classIndex)) { positive.Add(new Example(dsResult.Metadata, pcounter++, this[exampleIndex].Values, 0)); } List <Example> negative = new List <Example>(); int ncounter = 0; foreach (string negativeClassValue in negativeClassValues) { int negativeClassIndex = this._metadata.Target.GetIndex(negativeClassValue); foreach (int exampleIndex in this.Filter(negativeClassIndex)) { if (!negative.Exists(e => e.Index == exampleIndex)) { negative.Add(new Example(dsResult.Metadata, ncounter++, this[exampleIndex].Values, 1)); } } } List <Example> examples = new List <Example>(); examples.AddRange(positive); examples.AddRange(negative); dsResult.SetExamples(examples.ToArray()); return(dsResult); }
public Dataset[] Split(string [] classValues) { Dataset[] datasets = new Dataset[classValues.Length]; DataMining.Data.Attribute[] attClone = new Attribute[this.Metadata.Attributes.Length]; for (int attributeIndex = 0; attributeIndex < this.Metadata.Attributes.Length; attributeIndex++) { attClone[attributeIndex] = this.Metadata.Attributes[attributeIndex].Clone(); } string name = this._metadata.DatasetName; Data.Attribute target = new Attribute("class", this._metadata.Target.Index, classValues); DataMining.Data.Metadata metadata = new Data.Metadata(name, attClone, target); int newClassIndex = 0; foreach (string classValue in classValues) { int counter = 0; int classIndex = this._metadata.Target.GetIndex(classValue); datasets[classIndex] = new Dataset(metadata); List <Example> examples = new List <Example>(); foreach (int exampleIndex in this.Filter(classIndex)) { examples.Add(new Example(datasets[newClassIndex].Metadata, counter++, this[exampleIndex].Values, newClassIndex)); } datasets[newClassIndex].SetExamples(examples.ToArray()); classIndex++; } return(datasets); }
public static Data.Dataset LoadHierarchicalDatasetFromTxt(string filePath, bool skipfirstAttribute) { StreamReader reader = new StreamReader(filePath); string datasetName = null; Data.Metadata metadata = null; Data.Dataset dataset = null; List<Data.Attribute> attributes = new List<Data.Attribute>(); List<Data.Example> examples = new List<Data.Example>(); List<Data.Node> nodes = new List<Data.Node>(); int attributeIndex = 0; int exampleIndex = 0; string mode = "start"; while (!reader.EndOfStream) { string line = reader.ReadLine(); if (!string.IsNullOrEmpty(line)) { if (line.Contains("%")) continue; if (line.Contains("@relation")) { datasetName = line.Substring(line.IndexOf(' ')).Trim(); datasetName = datasetName.Contains("-weka") ? datasetName.Substring(0, datasetName.IndexOf("-weka")) : datasetName; } else if (line.Contains("@attribute")) { Data.Attribute attribute = ParseAttributeLine(line, attributeIndex); if (attribute != null) { attributeIndex++; attributes.Add(attribute); } } else if (line.Contains("@ontology")) { mode = "ontolog"; } else if (line.Contains("@data")) { List<string> classValues = new List<string>(); int counter = 0; for(int i=1; i<nodes.Count;i++) { Data.Node node = nodes[i]; node.ValueIndex = counter; classValues.Add(node.Name); counter++; } Data.ClassHierarchy classHierarchy = new Data.ClassHierarchy(nodes.ToArray()); Data.HierarchicalAttribute target = new Data.HierarchicalAttribute("class", attributes.Count, classValues.ToArray(), classHierarchy); metadata = new Data.Metadata(datasetName, attributes.ToArray(), target,true); dataset = new Data.Dataset(metadata); mode = "data"; } else { if (mode == "ontolog") { Data.Node node = ParseOntologyLine(line); if (!nodes.Exists(n => n.Name == node.Name)) nodes.Add(node); } else { Data.Example example = ParseHierarchicalExampleLine(exampleIndex, line, dataset, skipfirstAttribute); exampleIndex++; examples.Add(example); } } } } dataset.SetExamples(examples.ToArray()); return dataset; }
public static Data.Dataset LoadDatasetFromArff(string filePath) { StreamReader reader = new StreamReader(filePath); string datasetName = null; Data.Metadata metadata = null; Data.Dataset dataset = null; List<Data.Attribute> attributes = new List<Data.Attribute>(); List<Data.Example> examples = new List<Data.Example>(); int attributeIndex = 0; int exampleIndex = 0; List<string> exampleLines = new List<string>(); while (!reader.EndOfStream) { string line = reader.ReadLine(); if (!string.IsNullOrEmpty(line)) { if (line.Contains("%")) continue; if (line.Contains("@relation")) { datasetName = line.Substring(line.IndexOf(' ')).Trim(); datasetName = datasetName.Contains("-weka") ? datasetName.Substring(0, datasetName.IndexOf("-weka")) : datasetName; } else if (line.Contains("@attribute")) { Data.Attribute attribute = ParseAttributeLine(line, attributeIndex); if (attribute != null) { attributeIndex++; attributes.Add(attribute); } } else if (line.Contains("@data")) { int targetIndex = attributes.FindIndex(m => string.Compare(m.Name, "Class", true) == 0); Data.NominalAttribute target = attributes[targetIndex] as Data.NominalAttribute; attributes.RemoveAt(target.Index); metadata = new Data.Metadata(datasetName, attributes.ToArray(), target,false); dataset = new Data.Dataset(metadata); } else { if (exampleLines.Contains(line)) continue; Data.Example example = ParseExampleLine(exampleIndex, line, dataset); exampleIndex++; examples.Add(example); exampleLines.Add(line); } } } dataset.SetExamples(examples.ToArray()); return dataset; }
/// <summary> /// Initializes a new instance of the <see cref="Version"/> class. /// </summary> /// <param name="versionId">The <see cref="VersionId"/>.</param> /// <param name="revision">The revision.</param> /// <param name="metadata">The <see cref="Metadata"/>.</param> /// <param name="content">The content.</param> /// <remarks> /// Enables actions: all /// </remarks> public Version(VersionId versionId, string revision, Metadata metadata, Content content) { VersionId = versionId; Revision = revision; Metadata = metadata; if (Metadata == null) Metadata = new Data.Metadata(); Content = content; }
private static Data.Example ParseExampleLine(int exampleIndex, string line, Data.Metadata metadata, List <int> nominalAttributesIndexes) { string [] parts = line.Split(','); List <double> values = new List <double>(); int label = 0; for (; label < metadata.Target.Length; label++) { if (metadata.Target[label] == parts[parts.Length - 1]) { break; } } int offset = 0; for (int index = 0; index < parts.Length - 1; index++) { string stringValue = parts[index]; if (!nominalAttributesIndexes.Contains(index)) { double value = double.NaN; if (stringValue != "?") { value = double.Parse(stringValue); } values.Add(value); offset++; } else { Data.Attribute attribute = metadata.Attributes[offset]; string attributeName = attribute.Name.Substring(0, attribute.Name.IndexOf("::")); while (true) { attribute = metadata.Attributes[offset]; if (!attribute.Name.Contains("::")) { break; } string currentAttributeName = attribute.Name.Substring(0, attribute.Name.IndexOf("::")); if (currentAttributeName != attributeName) { break; } string valueName = attribute.Name.Substring(attribute.Name.IndexOf("::") + 2); if (valueName == stringValue) { values.Add(1); } else { values.Add(0); } offset++; if (offset == metadata.Attributes.Length) { break; } } } } Data.Example example = new Data.Example(metadata, exampleIndex, values.ToArray(), label); return(example); }
public static Data.Dataset LoadDatasetFromArff(string filePath) { StreamReader reader = new StreamReader(filePath); string datasetName = null; Data.Metadata metadata = null; Data.Dataset dataset = null; List <Data.Attribute> attributes = new List <Data.Attribute>(); List <Data.Example> examples = new List <Data.Example>(); string[] target = null; int attributeIndex = 0; int exampleIndex = 0; List <int> nominalAttributesIndexes = new List <int>(); int aindex = 0; List <string> exampleLines = new List <string>(); while (!reader.EndOfStream) { string line = reader.ReadLine(); if (!string.IsNullOrEmpty(line)) { if (line.Contains("%")) { continue; } if (line.Contains("@relation")) { datasetName = line.Substring(line.IndexOf(' ')).Trim(); datasetName = datasetName.Contains("-weka") ? datasetName.Substring(0, datasetName.IndexOf("-weka")) : datasetName; } else if (line.Contains("@attribute")) { if (line.ToUpper().Contains("CLASS")) { target = ParseClassAttributeLine(line); } else { Data.Attribute [] currentAttributes = ParseAttributeLine(line, attributeIndex); attributeIndex += currentAttributes.Length; attributes.AddRange(currentAttributes); if (currentAttributes.Length > 1 || !line.Contains(currentAttributes[0].Name)) { nominalAttributesIndexes.Add(aindex); } aindex++; } } else if (line.Contains("@data")) { metadata = new Data.Metadata(datasetName, attributes.ToArray(), target); dataset = new Data.Dataset(metadata); } else { if (exampleLines.Contains(line)) { continue; } Data.Example example = ParseExampleLine(exampleIndex, line, dataset.Metadata, nominalAttributesIndexes); exampleIndex++; examples.Add(example); exampleLines.Add(line); } } } dataset.SetExamples(examples.ToArray()); return(dataset); }