Ejemplo n.º 1
0
        private static Data.Attribute ParseAttributeLine(string line, int attributeIndex)
        {
             Data.Attribute attribute=null;

            string[] parts = line.Split(' ');

            string name=parts[1];
            for (int i = 2; i < parts.Length - 1; i++)
                name += parts[i];

            if (string.Compare(parts[parts.Length - 1].Trim(), "numeric", true) == 0)
            {

                attribute = new Data.NumericAttribute(name, attributeIndex);
            }
            else
            {
                string[] values = line.Split(' ');
                values = values[values.Length - 1].Trim('{', '}').Split(',');

                attribute = new Data.NominalAttribute(name, attributeIndex, values);

            }

            return attribute;

        
        }
Ejemplo n.º 2
0
        private static Data.Example ParseExampleLine(int exampleIndex, string line, Data.Dataset dataset)
        {
            string[] parts = line.Split(',');
            List<double> values = new List<double>();
            int label = dataset.Metadata.Target.GetIndex(parts[parts.Length - 1]);

            for (int index = 0; index < parts.Length - 1; index++)
            {
                if (dataset.Metadata.Attributes[index] is Data.NominalAttribute)
                {
                    Data.NominalAttribute attribute = dataset.Metadata.Attributes[index] as Data.NominalAttribute;
                    string value = parts[index];
                    values.Add(attribute.GetIndex(value));
                }
                else
                {
                    double value = double.NaN;
                    if (parts[index] != "?")
                        value = double.Parse(parts[index]);
                    values.Add(value);
                    
                }
               
            }


            Data.Example example = new Data.Example(dataset.Metadata, exampleIndex, values.ToArray(), label);
            return example;

        }
Ejemplo n.º 3
0
        private static Data.Instance ParseHierarchicalinstanceLine(int instanceIndex, string line, Data.Dataset dataset, bool skipFirstAttribute)
        {
            string[]      parts  = line.Split(',');
            List <double> values = new List <double>();

            List <int> label = new List <int>();

            string[] labelParts = parts[parts.Length - 1].Split(';');
            foreach (string target in labelParts)
            {
                if (!string.IsNullOrWhiteSpace(target))
                {
                    label.Add(dataset.Metadata.Target.GetIndex(target));
                }
            }



            for (int index = 0; index < parts.Length - 1; index++)
            {
                if (dataset.Metadata.Attributes[index] is Data.NominalAttribute)
                {
                    Data.NominalAttribute attribute = dataset.Metadata.Attributes[index] as Data.NominalAttribute;
                    string value = parts[index];
                    values.Add(attribute.GetIndex(value));
                }
                else
                {
                    double value = double.NaN;
                    if (parts[index] != "?")
                    {
                        value = double.Parse(parts[index]);
                    }
                    values.Add(value);
                }
            }


            Data.Instance instance = new Data.Instance(dataset.Metadata, instanceIndex, values.ToArray(), label);
            return(instance);
        }
Ejemplo n.º 4
0
        private void UpdateValueCounts()
        {
            for (int attributeIndex = 0; attributeIndex < this._metadata.Attributes.Length; attributeIndex++)
            {
                if (this._metadata.Attributes[attributeIndex] is NominalAttribute)
                {
                    Data.NominalAttribute currentAttribute = this._metadata.Attributes[attributeIndex] as NominalAttribute;

                    for (int valueIndex = 0; valueIndex < currentAttribute.Values.Length; valueIndex++)
                    {
                        currentAttribute.ValueCounts[valueIndex] = this.Filter(attributeIndex, valueIndex).Count;
                    }

                    for (int valueIndex = 0; valueIndex < this._metadata.Target.Values.Length; valueIndex++)
                    {
                        this._metadata.Target.ValueCounts[valueIndex] = this.Filter(valueIndex).Count;
                    }
                }
            }

            this._metadata.Size = this._examples.Length;
        }
Ejemplo n.º 5
0
        public static Data.Dataset LoadDatasetFromArff(string filePath)
        {
            StreamReader reader = new StreamReader(filePath);

            string datasetName = null;
            Data.Metadata metadata = null;
            Data.Dataset dataset = null;

            List<Data.Attribute> attributes = new List<Data.Attribute>();
            List<Data.Example> examples = new List<Data.Example>();
            int attributeIndex = 0;
            int exampleIndex = 0;

            List<string> exampleLines = new List<string>();

            while (!reader.EndOfStream)
            {
                string line = reader.ReadLine();
                if (!string.IsNullOrEmpty(line))
                {
                    if (line.Contains("%"))
                        continue;
                    if (line.Contains("@relation"))
                    {
                        datasetName = line.Substring(line.IndexOf(' ')).Trim();
                        datasetName = datasetName.Contains("-weka") ? datasetName.Substring(0, datasetName.IndexOf("-weka")) : datasetName;
                    }

                    else if (line.Contains("@attribute"))
                    {

                        Data.Attribute attribute = ParseAttributeLine(line, attributeIndex);

                        if (attribute != null)
                        {
                            attributeIndex++;
                            attributes.Add(attribute);
                        }

                    }

                    else if (line.Contains("@data"))
                    {
                        int targetIndex = attributes.FindIndex(m => string.Compare(m.Name, "Class", true) == 0);
                        Data.NominalAttribute target = attributes[targetIndex] as Data.NominalAttribute;
                        attributes.RemoveAt(target.Index);
                        metadata = new Data.Metadata(datasetName, attributes.ToArray(), target,false);
                        dataset = new Data.Dataset(metadata);


                    }
                    else
                    {
                        if (exampleLines.Contains(line))
                            continue;

                        Data.Example example = ParseExampleLine(exampleIndex, line, dataset);
                        exampleIndex++;
                        examples.Add(example);
                        exampleLines.Add(line);

                    }
                }

            }

            dataset.SetExamples(examples.ToArray());

            return dataset;
        }