Пример #1
0
        private static void ParseDataLine(string line, ref Instances data)
        {
            List<string> valueList = ParseValueList(line);
            if (null == valueList
                || 0 == valueList.Count)
            {
                throw new Exception("Unexpected exception");
            }

            data.AddInstance(valueList);

            if (data.Examples.Length % 1000 == 0)
            {
                // output some progress
                Logger.Log(LogLevel.Progress, " {0}", data.Examples.Length);
            }
        }
Пример #2
0
        /// <summary>
        /// Reads and parses the data stream in arff format into an instance of class Instances.
        /// </summary>
        /// <param name="dataStream">data stream with arff data</param>
        /// <param name="classAttributeName">name of the attribute to be used as class</param>
        /// <returns>object containing all training data</returns>
        public static Instances Read(StreamReader dataStream, string classAttributeName)
        {
            if (null == dataStream)
            {
                throw new ArgumentNullException("dataStream");
            }

            Instances data = new Instances();
            data.AddClassAttribute(classAttributeName);

            // move to the start of the stream - just in case
            dataStream.BaseStream.Seek(0, SeekOrigin.Begin);

            bool readingData = false;
            while (!dataStream.EndOfStream)
            {
                string line = dataStream.ReadLine();
                if (string.IsNullOrWhiteSpace(line))
                {
                    // skip over empty lines
                    continue;
                }

                // is this the data header
                if (line.Equals(DataSectionMarker))
                {
                    readingData = true;
                    continue;
                }

                // process the line
                if (!readingData)
                {
                    ParseMetadataLine(line, ref data);
                }
                else
                {
                    ParseDataLine(line, ref data);
                }
            }

            return data;
        }
Пример #3
0
        /// <summary>
        /// Learns a decision tree model for the data passed as arguments
        /// </summary>
        /// <param name="data">training data</param>
        /// <returns>decision tree</returns>
        public Node Learn(Instances data)
        {
            if (null == data
                || null == data.Examples
                || null == data.Attributes
                || null == data.ClassAttribute)
            {
                throw new ArgumentException("data");
            }

            Logger.Log(LogLevel.Progress, "Running ID3Lerner ");

            // adjust the attribute list
            var attributeList = data.Attributes.Where(a => !a.Name.Equals(data.ClassAttribute.Name));
            if (this.handleUnknownAsValue)
            {
                attributeList = attributeList.Select(a =>
                    {
                        List<string> values = a.Values.ToList();
                        values.Add(Instances.UnknownValue);

                        return new Arff.Attribute()
                        {
                            Name = a.Name,
                            Values = values.ToArray()
                        };
                    });
            }

            Node dt = Learn(
                data.Examples,
                attributeList.ToArray(),
                data.ClassAttribute);

            Logger.Log(LogLevel.Progress, " Done{0}", System.Console.Out.NewLine);
            return dt;
        }
Пример #4
0
        /// <summary>
        /// Computes the accuracy of the classifier on the data set passed as a parameter.
        /// </summary>
        /// <param name="data">test data to evaluate classifier on</param>
        /// <returns>number between 0.0 and 1.0 representing the accuracy</returns>
        public double Evaluate(Instances data)
        {
            if (null == data
                || null == data.ClassAttribute)
            {
                throw new ArgumentNullException("data");
            }

            Logger.Log(LogLevel.Progress, "Running Accuracy Evaluator ");

            int correct = 0;
            foreach (Instance example in data.Examples)
            {
                string predictedClass = this.classifier.Classify(example);
                string actualClass = example.Data[data.ClassAttribute.Name];
                if (predictedClass == actualClass)
                {
                    correct++;
                }
            }

            Logger.Log(LogLevel.Progress, " Done{0}", System.Console.Out.NewLine);
            return (double)correct / (double)data.Examples.Length;
        }
Пример #5
0
        private void TrainAndEvaluateClassifier(Instances data, Instances testData, double splitStoppingConfidence, bool useGainRatio)
        {
            // learn the tree
            ID3Learner learner = new ID3Learner(splitStoppingConfidence, true, useGainRatio);
            Node decisionTree = learner.Learn(data);

            // output the tree
            File.WriteAllText(
                string.Format("{0}_DTID3_{1}_{2}.txt", Path.GetFileNameWithoutExtension(this.trainingDataFilePath), useGainRatio, splitStoppingConfidence.ToString("0.0000")),
                decisionTree.ToString());

            // evaluate the classifier
            DTClassifier classifier = new DTClassifier(decisionTree);
            AccuracyEvaluator evaluator = new AccuracyEvaluator(classifier);
            double accuracy = evaluator.Evaluate(testData);

            this.reportData.Add(
                string.Format("{0}\t\t\t{1}\t\t{2}{3}", splitStoppingConfidence.ToString("0.0000"), useGainRatio, accuracy.ToString("0.0000"), System.Console.Out.NewLine));
        }
Пример #6
0
        private static void ParseMetadataLine(string line, ref Instances data)
        {
            // the only metadata lines we care about are attributes
            Match attributeMatch = AttributeLineRegex.Match(line);
            if (!attributeMatch.Success)
            {
                return;
            }

            // retrieve the data
            string attributeName = attributeMatch.Groups["Name"].Value;
            string attributeValueList = attributeMatch.Groups["Values"].Value;

            // lets parse the value list
            List<string> valueList = ParseValueList(attributeValueList);
            if (null == valueList
                || 0 == valueList.Count)
            {
                throw new Exception("Unexpected exception");
            }

            // add the attribute definition
            data.AddAttribute(attributeName, valueList);
        }