Ejemplo n.º 1
0
        private static void AddRelationRow(ArffRelation relation, string row, List <int> ignoredCols)
        {
            string[]      values    = row.Split(',');
            List <double> rowValues = new List <double>();

            int effectiveCol = 0;

            for (int i = 0; i < values.Length; i++)
            {
                if (!ignoredCols.Contains(i))
                {
                    if (values[i] == "?")
                    {
                        rowValues.Add(double.MaxValue);
                    }
                    else if (relation.Columns[effectiveCol].IsReal)
                    {
                        rowValues.Add(double.Parse(values[i]));
                    }
                    else
                    {
                        rowValues.Add(relation.Columns[effectiveCol].NominalValues[values[i]]);
                    }
                    effectiveCol++;
                }
            }

            var toAdd = new ArffRow(relation, rowValues.ToList());

            relation.AddRow(toAdd);
        }
Ejemplo n.º 2
0
        private static void AddRelationAttribute(ArffRelation relation, string attribute)
        {
            string[] attributeValues = new string[3];
            attributeValues[0] = attribute.Substring(0, attribute.IndexOfAny(new char[] { ' ', '\t' }));
            attribute          = attribute.Remove(0, attributeValues[0].Length).Trim();
            attributeValues[1] = attribute.Substring(0, attribute.IndexOfAny(new char[] { ' ', '\t' }));
            attribute          = attribute.Remove(0, attributeValues[1].Length).Trim();
            attributeValues[2] = attribute;

            string name   = attributeValues[1];
            bool   isReal = true;
            Map <double, string> nominals = null;

            if (attributeValues[2].ToLower() != "real" && attributeValues[2].ToLower() != "continuous")
            {
                isReal   = false;
                nominals = new Map <double, string>();
                string[] nominalValues = attributeValues[2].Trim(new char[] { '{', '}' }).Split(',');
                for (int i = 0; i < nominalValues.Length; i++)
                {
                    nominals.Add(i, nominalValues[i].Trim());
                }
            }

            var column = new ArffColumn(name, isReal, nominals);

            relation.AddColumn(column);
        }
Ejemplo n.º 3
0
        public static ArffRelation LoadArff(FileStream fs, List <int> columnsToIgnore)
        {
            var          reader           = new StreamReader(fs);
            ArffRelation toReturn         = null;
            bool         notInDataSection = true;

            int currentColumn = 0;

            while (!reader.EndOfStream && notInDataSection)
            {
                string line = reader.ReadLine();

                if (line.ToLower().StartsWith("@relation"))
                {
                    toReturn = LoadRelation(line);
                }
                else if (line.ToLower().StartsWith("@attribute"))
                {
                    if (!columnsToIgnore.Contains(currentColumn))
                    {
                        AddRelationAttribute(toReturn, line);
                    }
                    currentColumn++;
                }
                else if (line.ToLower().StartsWith("@data"))
                {
                    notInDataSection = false;
                }
            }

            // Reading the actual data.
            while (!reader.EndOfStream)
            {
                string line = reader.ReadLine();

                // % starts a comment line in arff files.
                if (!line.StartsWith("%"))
                {
                    AddRelationRow(toReturn, line, columnsToIgnore);
                }
            }

            return(toReturn);
        }
Ejemplo n.º 4
0
        private static void RunClusteringAlgorithm(ArffParser.ArffRelation relation, int k, bool randomInitials)
        {
            var    clusterer    = new Clusterer.KClusterer(k, relation, randomInitials);
            double squaredError = clusterer.TotalSquaredError();

            PrintMessage(clusterer, 1);

            clusterer.ReCluster();
            double nextSquaredError = clusterer.TotalSquaredError();
            int    iteration        = 2;

            PrintMessage(clusterer, iteration);
            while (nextSquaredError != squaredError)
            {
                iteration++;
                clusterer.ReCluster();
                squaredError     = nextSquaredError;
                nextSquaredError = clusterer.TotalSquaredError();
                PrintMessage(clusterer, iteration);
            }
        }
Ejemplo n.º 5
0
 public ArffRow(ArffRelation relation, List <double> values)
 {
     _relation = relation;
     _values   = values;
 }