private static void ParseDataLine(string line, ref Instances data) { List<string> valueList = ParseValueList(line); if (null == valueList || 0 == valueList.Count) { throw new Exception("Unexpected exception"); } data.AddInstance(valueList); if (data.Examples.Length % 1000 == 0) { // output some progress Logger.Log(LogLevel.Progress, " {0}", data.Examples.Length); } }
/// <summary> /// Reads and parses the data stream in arff format into an instance of class Instances. /// </summary> /// <param name="dataStream">data stream with arff data</param> /// <param name="classAttributeName">name of the attribute to be used as class</param> /// <returns>object containing all training data</returns> public static Instances Read(StreamReader dataStream, string classAttributeName) { if (null == dataStream) { throw new ArgumentNullException("dataStream"); } Instances data = new Instances(); data.AddClassAttribute(classAttributeName); // move to the start of the stream - just in case dataStream.BaseStream.Seek(0, SeekOrigin.Begin); bool readingData = false; while (!dataStream.EndOfStream) { string line = dataStream.ReadLine(); if (string.IsNullOrWhiteSpace(line)) { // skip over empty lines continue; } // is this the data header if (line.Equals(DataSectionMarker)) { readingData = true; continue; } // process the line if (!readingData) { ParseMetadataLine(line, ref data); } else { ParseDataLine(line, ref data); } } return data; }
/// <summary> /// Learns a decision tree model for the data passed as arguments /// </summary> /// <param name="data">training data</param> /// <returns>decision tree</returns> public Node Learn(Instances data) { if (null == data || null == data.Examples || null == data.Attributes || null == data.ClassAttribute) { throw new ArgumentException("data"); } Logger.Log(LogLevel.Progress, "Running ID3Lerner "); // adjust the attribute list var attributeList = data.Attributes.Where(a => !a.Name.Equals(data.ClassAttribute.Name)); if (this.handleUnknownAsValue) { attributeList = attributeList.Select(a => { List<string> values = a.Values.ToList(); values.Add(Instances.UnknownValue); return new Arff.Attribute() { Name = a.Name, Values = values.ToArray() }; }); } Node dt = Learn( data.Examples, attributeList.ToArray(), data.ClassAttribute); Logger.Log(LogLevel.Progress, " Done{0}", System.Console.Out.NewLine); return dt; }
/// <summary> /// Computes the accuracy of the classifier on the data set passed as a parameter. /// </summary> /// <param name="data">test data to evaluate classifier on</param> /// <returns>number between 0.0 and 1.0 representing the accuracy</returns> public double Evaluate(Instances data) { if (null == data || null == data.ClassAttribute) { throw new ArgumentNullException("data"); } Logger.Log(LogLevel.Progress, "Running Accuracy Evaluator "); int correct = 0; foreach (Instance example in data.Examples) { string predictedClass = this.classifier.Classify(example); string actualClass = example.Data[data.ClassAttribute.Name]; if (predictedClass == actualClass) { correct++; } } Logger.Log(LogLevel.Progress, " Done{0}", System.Console.Out.NewLine); return (double)correct / (double)data.Examples.Length; }
private void TrainAndEvaluateClassifier(Instances data, Instances testData, double splitStoppingConfidence, bool useGainRatio) { // learn the tree ID3Learner learner = new ID3Learner(splitStoppingConfidence, true, useGainRatio); Node decisionTree = learner.Learn(data); // output the tree File.WriteAllText( string.Format("{0}_DTID3_{1}_{2}.txt", Path.GetFileNameWithoutExtension(this.trainingDataFilePath), useGainRatio, splitStoppingConfidence.ToString("0.0000")), decisionTree.ToString()); // evaluate the classifier DTClassifier classifier = new DTClassifier(decisionTree); AccuracyEvaluator evaluator = new AccuracyEvaluator(classifier); double accuracy = evaluator.Evaluate(testData); this.reportData.Add( string.Format("{0}\t\t\t{1}\t\t{2}{3}", splitStoppingConfidence.ToString("0.0000"), useGainRatio, accuracy.ToString("0.0000"), System.Console.Out.NewLine)); }
private static void ParseMetadataLine(string line, ref Instances data) { // the only metadata lines we care about are attributes Match attributeMatch = AttributeLineRegex.Match(line); if (!attributeMatch.Success) { return; } // retrieve the data string attributeName = attributeMatch.Groups["Name"].Value; string attributeValueList = attributeMatch.Groups["Values"].Value; // lets parse the value list List<string> valueList = ParseValueList(attributeValueList); if (null == valueList || 0 == valueList.Count) { throw new Exception("Unexpected exception"); } // add the attribute definition data.AddAttribute(attributeName, valueList); }