/// <summary>
 /// Initializes a new instance of the <see cref="Version"/> class.
 /// </summary>
 /// <param name="versionId">The <see cref="VersionId"/>.</param>
 /// <param name="revision">The revision.</param>
 /// <param name="metadata">The <see cref="Metadata"/>.</param>
 /// <param name="content">The content.</param>
 /// <remarks>
 /// Enables actions: all
 /// </remarks>
 public Version(VersionId versionId, string revision, Metadata metadata, Content content)
 {
     VersionId = versionId;
     Revision  = revision;
     Metadata  = metadata;
     if (Metadata == null)
     {
         Metadata = new Data.Metadata();
     }
     Content = content;
 }
Example #2
0
        public Dataset GetBinaryFlatLabelSubDataset(string positiveClassValue, string[] negativeClassValues)
        {
            string name       = this._metadata.DatasetName;
            int    classIndex = this._metadata.Target.GetIndex(positiveClassValue);


            DataMining.Data.Attribute[] attClone = new Attribute[this.Metadata.Attributes.Length];
            for (int attributeIndex = 0; attributeIndex < this.Metadata.Attributes.Length; attributeIndex++)
            {
                attClone[attributeIndex] = this.Metadata.Attributes[attributeIndex].Clone();
            }

            Data.NominalAttribute    target   = new NominalAttribute("class", this._metadata.Target.Index, new string[] { "Yes", "No" });
            DataMining.Data.Metadata metadata = new Data.Metadata(name, attClone, target, false);

            Dataset dsResult = new Dataset(metadata);


            int            positiveClassIndex = this._metadata.Target.GetIndex(positiveClassValue);
            List <Example> positive           = new List <Example>();

            int pcounter = 0;

            foreach (int exampleIndex in this.Filter(classIndex))
            {
                positive.Add(new Example(dsResult.Metadata, pcounter++, this[exampleIndex].Values, 0));
            }

            List <Example> negative = new List <Example>();

            int ncounter = 0;

            foreach (string negativeClassValue in negativeClassValues)
            {
                int negativeClassIndex = this._metadata.Target.GetIndex(negativeClassValue);
                foreach (int exampleIndex in this.Filter(negativeClassIndex))
                {
                    if (!negative.Exists(e => e.Index == exampleIndex))
                    {
                        negative.Add(new Example(dsResult.Metadata, ncounter++, this[exampleIndex].Values, 1));
                    }
                }
            }


            List <Example> examples = new List <Example>();

            examples.AddRange(positive);
            examples.AddRange(negative);
            dsResult.SetExamples(examples.ToArray());

            return(dsResult);
        }
Example #3
0
        public Dataset[] Split(string [] classValues)
        {
            Dataset[] datasets = new Dataset[classValues.Length];

            DataMining.Data.Attribute[] attClone = new Attribute[this.Metadata.Attributes.Length];
            for (int attributeIndex = 0; attributeIndex < this.Metadata.Attributes.Length; attributeIndex++)
            {
                attClone[attributeIndex] = this.Metadata.Attributes[attributeIndex].Clone();
            }
            string name = this._metadata.DatasetName;

            Data.Attribute target = new Attribute("class", this._metadata.Target.Index, classValues);

            DataMining.Data.Metadata metadata = new Data.Metadata(name, attClone, target);

            int newClassIndex = 0;

            foreach (string classValue in classValues)
            {
                int counter    = 0;
                int classIndex = this._metadata.Target.GetIndex(classValue);


                datasets[classIndex] = new Dataset(metadata);

                List <Example> examples = new List <Example>();

                foreach (int exampleIndex in this.Filter(classIndex))
                {
                    examples.Add(new Example(datasets[newClassIndex].Metadata, counter++, this[exampleIndex].Values, newClassIndex));
                }

                datasets[newClassIndex].SetExamples(examples.ToArray());
                classIndex++;
            }

            return(datasets);
        }
Example #4
0
        public static Data.Dataset LoadHierarchicalDatasetFromTxt(string filePath, bool skipfirstAttribute)
        {
            StreamReader reader = new StreamReader(filePath);
            
            string datasetName = null;
            Data.Metadata metadata = null;
            Data.Dataset dataset = null;

            List<Data.Attribute> attributes = new List<Data.Attribute>();
            List<Data.Example> examples = new List<Data.Example>();
            List<Data.Node> nodes = new List<Data.Node>();

            int attributeIndex = 0;
            int exampleIndex = 0;

            string mode = "start";
            


            while (!reader.EndOfStream)
            {
                string line = reader.ReadLine();
                if (!string.IsNullOrEmpty(line))
                {
                    if (line.Contains("%"))
                        continue;
                    if (line.Contains("@relation"))
                    {
                        datasetName = line.Substring(line.IndexOf(' ')).Trim();
                        datasetName = datasetName.Contains("-weka") ? datasetName.Substring(0, datasetName.IndexOf("-weka")) : datasetName;
                    }

                    else if (line.Contains("@attribute"))
                    {

                        Data.Attribute attribute = ParseAttributeLine(line, attributeIndex);

                        if (attribute != null)
                        {
                            attributeIndex++;
                            attributes.Add(attribute);
                        }


                    }

                    else if (line.Contains("@ontology"))
                    {
                        mode = "ontolog";

                    }

                    else if (line.Contains("@data"))
                    {
                        List<string> classValues = new List<string>();

                        int counter = 0;
                        for(int i=1; i<nodes.Count;i++)                        
                        {
                            Data.Node node = nodes[i];
                            node.ValueIndex = counter;
                            classValues.Add(node.Name);
                            counter++;
                        }

                        Data.ClassHierarchy classHierarchy = new Data.ClassHierarchy(nodes.ToArray());

                        Data.HierarchicalAttribute target = new Data.HierarchicalAttribute("class", attributes.Count, classValues.ToArray(), classHierarchy);
                        metadata = new Data.Metadata(datasetName, attributes.ToArray(), target,true);
                        dataset = new Data.Dataset(metadata);

                        mode = "data";

                    }
                    else
                    {
                        if (mode == "ontolog")
                        {
                            Data.Node node = ParseOntologyLine(line);
                            if (!nodes.Exists(n => n.Name == node.Name))
                                nodes.Add(node);

                        }
                        else
                        {

                            Data.Example example = ParseHierarchicalExampleLine(exampleIndex, line, dataset, skipfirstAttribute);
                            exampleIndex++;
                            examples.Add(example);

                        }

                    }
                }

            }

            dataset.SetExamples(examples.ToArray());

            return dataset;
        }
Example #5
0
        public static Data.Dataset LoadDatasetFromArff(string filePath)
        {
            StreamReader reader = new StreamReader(filePath);

            string datasetName = null;
            Data.Metadata metadata = null;
            Data.Dataset dataset = null;

            List<Data.Attribute> attributes = new List<Data.Attribute>();
            List<Data.Example> examples = new List<Data.Example>();
            int attributeIndex = 0;
            int exampleIndex = 0;

            List<string> exampleLines = new List<string>();

            while (!reader.EndOfStream)
            {
                string line = reader.ReadLine();
                if (!string.IsNullOrEmpty(line))
                {
                    if (line.Contains("%"))
                        continue;
                    if (line.Contains("@relation"))
                    {
                        datasetName = line.Substring(line.IndexOf(' ')).Trim();
                        datasetName = datasetName.Contains("-weka") ? datasetName.Substring(0, datasetName.IndexOf("-weka")) : datasetName;
                    }

                    else if (line.Contains("@attribute"))
                    {

                        Data.Attribute attribute = ParseAttributeLine(line, attributeIndex);

                        if (attribute != null)
                        {
                            attributeIndex++;
                            attributes.Add(attribute);
                        }

                    }

                    else if (line.Contains("@data"))
                    {
                        int targetIndex = attributes.FindIndex(m => string.Compare(m.Name, "Class", true) == 0);
                        Data.NominalAttribute target = attributes[targetIndex] as Data.NominalAttribute;
                        attributes.RemoveAt(target.Index);
                        metadata = new Data.Metadata(datasetName, attributes.ToArray(), target,false);
                        dataset = new Data.Dataset(metadata);


                    }
                    else
                    {
                        if (exampleLines.Contains(line))
                            continue;

                        Data.Example example = ParseExampleLine(exampleIndex, line, dataset);
                        exampleIndex++;
                        examples.Add(example);
                        exampleLines.Add(line);

                    }
                }

            }

            dataset.SetExamples(examples.ToArray());

            return dataset;
        }
Example #6
0
 /// <summary>
 /// Initializes a new instance of the <see cref="Version"/> class.
 /// </summary>
 /// <param name="versionId">The <see cref="VersionId"/>.</param>
 /// <param name="revision">The revision.</param>
 /// <param name="metadata">The <see cref="Metadata"/>.</param>
 /// <param name="content">The content.</param>
 /// <remarks>
 /// Enables actions: all
 /// </remarks>
 public Version(VersionId versionId, string revision, Metadata metadata, Content content)
 {
     VersionId = versionId;
     Revision = revision;
     Metadata = metadata;
     if (Metadata == null) Metadata = new Data.Metadata();
     Content = content;
 }
Example #7
0
        private static Data.Example ParseExampleLine(int exampleIndex, string line, Data.Metadata metadata, List <int> nominalAttributesIndexes)
        {
            string []     parts  = line.Split(',');
            List <double> values = new List <double>();
            int           label  = 0;

            for (; label < metadata.Target.Length; label++)
            {
                if (metadata.Target[label] == parts[parts.Length - 1])
                {
                    break;
                }
            }

            int offset = 0;

            for (int index = 0; index < parts.Length - 1; index++)
            {
                string stringValue = parts[index];

                if (!nominalAttributesIndexes.Contains(index))
                {
                    double value = double.NaN;
                    if (stringValue != "?")
                    {
                        value = double.Parse(stringValue);
                    }
                    values.Add(value);
                    offset++;
                }
                else
                {
                    Data.Attribute attribute     = metadata.Attributes[offset];
                    string         attributeName = attribute.Name.Substring(0, attribute.Name.IndexOf("::"));

                    while (true)
                    {
                        attribute = metadata.Attributes[offset];

                        if (!attribute.Name.Contains("::"))
                        {
                            break;
                        }

                        string currentAttributeName = attribute.Name.Substring(0, attribute.Name.IndexOf("::"));

                        if (currentAttributeName != attributeName)
                        {
                            break;
                        }

                        string valueName = attribute.Name.Substring(attribute.Name.IndexOf("::") + 2);
                        if (valueName == stringValue)
                        {
                            values.Add(1);
                        }
                        else
                        {
                            values.Add(0);
                        }
                        offset++;

                        if (offset == metadata.Attributes.Length)
                        {
                            break;
                        }
                    }
                }
            }


            Data.Example example = new Data.Example(metadata, exampleIndex, values.ToArray(), label);
            return(example);
        }
Example #8
0
        public static Data.Dataset LoadDatasetFromArff(string filePath)
        {
            StreamReader reader = new StreamReader(filePath);

            string datasetName = null;

            Data.Metadata metadata = null;
            Data.Dataset  dataset  = null;

            List <Data.Attribute> attributes = new List <Data.Attribute>();
            List <Data.Example>   examples   = new List <Data.Example>();

            string[] target = null;

            int attributeIndex = 0;
            int exampleIndex   = 0;

            List <int> nominalAttributesIndexes = new List <int>();
            int        aindex = 0;

            List <string> exampleLines = new List <string>();

            while (!reader.EndOfStream)
            {
                string line = reader.ReadLine();
                if (!string.IsNullOrEmpty(line))
                {
                    if (line.Contains("%"))
                    {
                        continue;
                    }
                    if (line.Contains("@relation"))
                    {
                        datasetName = line.Substring(line.IndexOf(' ')).Trim();
                        datasetName = datasetName.Contains("-weka") ? datasetName.Substring(0, datasetName.IndexOf("-weka")) : datasetName;
                    }

                    else if (line.Contains("@attribute"))
                    {
                        if (line.ToUpper().Contains("CLASS"))
                        {
                            target = ParseClassAttributeLine(line);
                        }

                        else
                        {
                            Data.Attribute [] currentAttributes = ParseAttributeLine(line, attributeIndex);
                            attributeIndex += currentAttributes.Length;
                            attributes.AddRange(currentAttributes);

                            if (currentAttributes.Length > 1 || !line.Contains(currentAttributes[0].Name))
                            {
                                nominalAttributesIndexes.Add(aindex);
                            }
                            aindex++;
                        }
                    }

                    else if (line.Contains("@data"))
                    {
                        metadata = new Data.Metadata(datasetName, attributes.ToArray(), target);
                        dataset  = new Data.Dataset(metadata);
                    }
                    else
                    {
                        if (exampleLines.Contains(line))
                        {
                            continue;
                        }

                        Data.Example example = ParseExampleLine(exampleIndex, line, dataset.Metadata, nominalAttributesIndexes);
                        exampleIndex++;
                        examples.Add(example);
                        exampleLines.Add(line);
                    }
                }
            }

            dataset.SetExamples(examples.ToArray());

            return(dataset);
        }