Пример #1
0
        public static DataSet ConstructFromCsv(string filePath, bool hasClassLabel)
        {
            var set = new DataSet();

            var contents = File.ReadAllText(filePath);
            var entries = contents.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries);

            var correctAttrCount = 0;

            foreach (var entry in entries)
            {
                var example = new Example();

                var fields = entry.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries);

                // if we have a class label, the last field is not an attribute
                var attrCount = hasClassLabel ? fields.Length - 1 : fields.Length;

                // make sure that all entries have the same number of attributes
                if (correctAttrCount == 0)
                {
                    correctAttrCount = attrCount;
                }
                else if (correctAttrCount != attrCount)
                {
                    throw new Exception("Invalid CSV entry, wrong number of attributes.");
                }

                // add all attributes to example
                for (var i = 0; i < attrCount; i++)
                {
                    var value = fields[i].Trim();

                    example.Attributes[i] = value;

                    if (set.Attributes.Count == i)
                    {
                        set.Attributes.Add(new DataAttribute(i));
                    }
                    set.Attributes[i].Values.Add(value);
                }

                // add class label if we have it
                if (hasClassLabel)
                {
                    example.ClassLabel = fields[attrCount].Trim();
                }

                set.Examples.Add(example);
            }

            return set;
        }
Пример #2
0
        /// <summary>
        /// Classifies test data examples using decision tree, and outputs results to stdout.
        /// </summary>
        public static void Run(TreeNode decisionTree, DataSet testData)
        {
            try
            {
                Console.WriteLine();

                foreach (var example in testData.Examples)
                {
                    var classLabel = GetClassLabel(decisionTree, example);

                    var attrs = example.Attributes.OrderBy(x => x.Key).Select(x => x.Value);

                    Console.WriteLine("{0}   ==> {1}", string.Join(",", attrs), classLabel);
                }
            }
            catch // we will only get an exception if the inputs were invalid
            {
                throw new Exception("Test data cannot be used with decision tree - one of them is invalid.");
            }
        }
Пример #3
0
 /// <summary>
 /// Construct decision tree from training data
 /// </summary>
 public static TreeNode ConstructDecisionTree(DataSet trainingData)
 {
     return LearnInternal(trainingData.Examples, trainingData.Attributes, new List<Example>());
 }