/// <summary> /// Reads an arff file and gets all the headers (<see cref="ArffHeader"/>) and instances (<see cref="object[][]"/>). /// </summary> /// <param name="arff">Arff file name</param> private void ReadArff(string arff) { Header = null; Instances = null; try { if (File.Exists(arff)) { using (ArffReader arffReader = new ArffReader(arff)) { Header = arffReader.ReadHeader(); Instances = arffReader.ReadAllInstances(); } } else { throw new FileNotFoundException(string.Format("File \"{0}\" does not exist.", arff), arff); } } catch (Exception e) { if (Verbose) { throw new Exception(string.Format("Arff file could not be read: {0}\n{1}", e.Message, e.StackTrace), e); } else { throw new Exception(string.Format("Arff file could not be read: {0}", e.Message), e); } } }
public void CRLineEndings() { string arff = "@relation relationName\r@attribute a1 numeric\r@data"; ArffReader arffReader = CreateArffReader(arff); ArffHeader arffHeader = arffReader.ReadHeader(); }
public void MissingAttributes() { string arff = @"@relation relationName @data"; ArffReader arffReader = CreateArffReader(arff); ArffHeader arffHeader = arffReader.ReadHeader(); }
public void MissingRelationName() { string arff = @"@attribute a1 numeric @data"; ArffReader arffReader = CreateArffReader(arff); ArffHeader arffHeader = arffReader.ReadHeader(); }
public void ReadInstanceBeforeHeader() { string arff = @"@relation relationName @attribute a1 numeric @data"; ArffReader arffReader = CreateArffReader(arff); arffReader.ReadInstance(); }
public void KeywordCaseInsensitivity() { string arff = @"@RElatION relationName @ATTRiBUTE a1 nuMERic @DaTa"; ArffReader arffReader = CreateArffReader(arff); ArffHeader arffHeader = arffReader.ReadHeader(); }
public void MissingData() { string arff = @"@relation relationName @attribute a1 numeric "; ArffReader arffReader = CreateArffReader(arff); ArffHeader arffHeader = arffReader.ReadHeader(); }
public void NoInstances() { string arff = @"@relation relationName @attribute a1 numeric @data"; ArffReader arffReader = CreateArffReader(arff); arffReader.ReadHeader(); Assert.IsNull(arffReader.ReadInstance()); Assert.IsNull(arffReader.ReadInstance()); }
public void InstanceWeightsReadCorrectly() { string arff = @"@relation relationName @attribute a1 numeric @attribute a2 integer @attribute a3 real @data -6.54,42,0.0 -6.54,42,0.0,{5} -6.54,42,0.0,{0.476} {0 -6.54,1 42} {0 -6.54,1 42},{0.476}"; ArffReader arffReader = CreateArffReader(arff); arffReader.ReadHeader(); arffReader.ReadInstance(out double?instanceWeight); Assert.IsNull(instanceWeight); arffReader.ReadInstance(out instanceWeight); Assert.AreEqual(5.0, instanceWeight); arffReader.ReadInstance(out instanceWeight); Assert.AreEqual(0.476, instanceWeight); arffReader.ReadInstance(out instanceWeight); Assert.IsNull(instanceWeight); arffReader.ReadInstance(out instanceWeight); Assert.AreEqual(0.476, instanceWeight); }
private void AssertReader(string arff, string expectedRelationName = null, ICollection expectedAttributes = null, object[][] expectedInstances = null) { ArffReader arffReader = CreateArffReader(arff); ArffHeader arffHeader = arffReader.ReadHeader(); if (expectedRelationName != null) { Assert.AreEqual(expectedRelationName, arffHeader.RelationName, "Unexpected relation name."); } if (expectedAttributes != null) { CollectionAssert.AreEqual(expectedAttributes, arffHeader.Attributes, "Unexpected attributes."); } object[][] instances = arffReader.ReadAllInstances(); if (expectedInstances != null) { CollectionAssert.AreEqual(expectedInstances, instances, new InstanceComparer(), "Unexpected instances."); } }
static void Main(string[] args) { // Training ArffHeader header = null; List <object[]> instances = new List <object[]>(); using (ArffReader arffReader = new ArffReader(_trainingArffFile)) { header = arffReader.ReadHeader(); object[] instance; while ((instance = arffReader.ReadInstance()) != null) { instances.Add(instance); } } List <int[]> trainingData = new List <int[]>(instances.Select(objectArray => objectArray.Select(o => o == null ? -1 : (int)o).ToArray())); // Test instances = new List <object[]>(); using (ArffReader arffReader = new ArffReader(_testArffFile)) { header = arffReader.ReadHeader(); object[] instance; while ((instance = arffReader.ReadInstance()) != null) { instances.Add(instance); } } List <int[]> testData = new List <int[]>(instances.Select(objectArray => objectArray.Select(o => o == null ? -1 : (int)o).ToArray())); double[] confidences = new double[] { 0.0, 0.1, 0.2, 0.4, 0.6, 0.8, 0.9, 0.95, 0.99, 0.9999 }; PrintAsCsv(header, trainingData, _pathToOutputCsv); Parallel.ForEach(confidences, confidence => { Id3Classifier classifier = new Id3Classifier(trainingData, _classIndex, confidence); Console.WriteLine($"Confidence {confidence}: Num of nodes {classifier.Tree.Count}"); // Test accuracy on training Console.WriteLine($"Confidence {confidence}: Accuracy on train = { trainingData.Where(instance => classifier.GetClass(instance) == instance[_classIndex]).Count() / (double)trainingData.Count}"); // Test accuracy on test Console.WriteLine($"Confidence {confidence}: Accuracy on test = { testData.Where(instance => classifier.GetClass(instance) == instance[_classIndex]).Count() / (double)testData.Count}"); StringBuilder sb = new StringBuilder(); StringBuilder sbMaxPositive = new StringBuilder(); StringBuilder sbMaxNegative = new StringBuilder(); int maxPositive = int.MinValue; int maxNegative = int.MinValue; // Only print small trees. if (confidence > 0.5) { PrintTreeAsRules(sb, ref sbMaxPositive, ref sbMaxNegative, ref maxPositive, ref maxNegative, classifier.Tree, header); sb.AppendLine("The most max positive rule is:"); sb.AppendLine(sbMaxPositive.ToString()); sb.AppendLine(); sb.AppendLine("The most max negative rule is:"); sb.AppendLine(sbMaxNegative.ToString()); Directory.CreateDirectory(_outputFolder); File.WriteAllText(Path.Combine(_outputFolder, $"Tree{confidence}.txt"), sb.ToString()); } }); Console.WriteLine("Press ENTER to exit..."); Console.ReadLine(); }
private void WriteArffFile(string[] allfiles) { using (ArffWriter arffWriter = new ArffWriter(textBox3.Text + ".arff")) { bool relation = true; object[] inst = null; object[][] insts = new object[allfiles.Length][]; int n = 0; foreach (string file in allfiles) { FileInfo fi = new FileInfo(file); ArffReader datafile = new ArffReader(file); ArffHeader header = datafile.ReadHeader(); var instances = datafile.ReadAllInstances(); //var instance = datafile.ReadInstance(); object[] instance = null; foreach (var ins in instances) { instance = ins; } if (header != null) { var attributes = header.Attributes; string line; StreamReader attrfile = new StreamReader("attributes.txt"); int i = 0; int j = 0; while ((line = attrfile.ReadLine()) != null) { foreach (var attribute in attributes) { if (attribute.Name == line) { if (relation == true) { arffWriter.WriteRelationName(header.RelationName); relation = false; } arffWriter.WriteAttribute(new ArffAttribute(attribute.Name, attribute.Type)); j++; } i++; } } inst = new object[j]; j = 0; i = 0; while ((line = attrfile.ReadLine()) != null) { foreach (var attribute in attributes) { if (attribute.Name == line) { inst.SetValue(instance.GetValue(i), j); j++; } i++; } } insts.SetValue(inst, n); attrfile.Close(); } datafile.Dispose(); n++; progress++; } foreach (var ins in insts) { arffWriter.WriteInstance(new object[] { ins }); } } }
static void Main(string[] args) { // Training ArffHeader header = null; List <object[]> instances = new List <object[]>(); using (ArffReader arffReader = new ArffReader(TrainingArffFile)) { header = arffReader.ReadHeader(); object[] instance; while ((instance = arffReader.ReadInstance()) != null) { instances.Add(instance); } } List <int[]> trainingData = new List <int[]>(instances.Select(objectArray => objectArray.Select(o => o == null ? -1 : (int)o).ToArray())); // Test instances = new List <object[]>(); using (ArffReader arffReader = new ArffReader(TestArffFile)) { header = arffReader.ReadHeader(); object[] instance; while ((instance = arffReader.ReadInstance()) != null) { instances.Add(instance); } } List <int[]> testData = new List <int[]>(instances.Select(objectArray => objectArray.Select(o => o == null ? -1 : (int)o).ToArray())); Console.WriteLine("Number of Samples, training accuracy, test accuracy"); // Do this excercise multiple times as sampling is random. Parallel.For(0, 100, (k) => { // Dictionaries to store results for different accuracies. KEEP IN SYNC :) ConcurrentDictionary <int, double> sampleTrainingAccuraciesMap = new ConcurrentDictionary <int, double>(new Dictionary <int, double> { { 1, 0 }, { 3, 0 }, { 5, 0 }, { 10, 0 }, { 20, 0 }, { 25, 0 }, { 50, 0 }, { 75, 0 }, { 100, 0 } }); ConcurrentDictionary <int, double> sampleTestAccuraciesMap = new ConcurrentDictionary <int, double>(new Dictionary <int, double> { { 1, 0 }, { 3, 0 }, { 5, 0 }, { 10, 0 }, { 20, 0 }, { 25, 0 }, { 50, 0 }, { 75, 0 }, { 100, 0 } }); // Calculate different sample accuracies in parallel. Parallel.ForEach(sampleTrainingAccuraciesMap.Keys, numOfSamples => { List <Id3Classifier> classifiers = new List <Id3Classifier>(); Sampler sampler = new Sampler(trainingData, numOfSamples); for (int i = 0; i < numOfSamples; i++) { Id3Classifier classifier = new Id3Classifier(sampler.Samples[i], ClassIndex, Confidence); classifiers.Add(classifier); } // Evaluate training and test to look out for overfitting. sampleTrainingAccuraciesMap[numOfSamples] = Evaluate(trainingData, classifiers); sampleTestAccuraciesMap[numOfSamples] = Evaluate(testData, classifiers); }); lock (_lockConsole) { foreach (int numOfSamples in sampleTestAccuraciesMap.Keys.OrderBy(n => n)) { Console.WriteLine($"{numOfSamples},{sampleTrainingAccuraciesMap[numOfSamples]},{sampleTestAccuraciesMap[numOfSamples]}"); } } }); Console.WriteLine("Press ENTER to exit..."); Console.ReadLine(); }
private void Learn() { using (ArffReader arffReader = new ArffReader(filePath)) { header = arffReader.ReadHeader(); attributes = new Dictionary <string, AttributeListInfo>(); //attribute name, attribute info attributeList = header.Attributes.ToList(); foreach (var attr in attributeList) { string[] array = attr.Type.ToString().Split(','); if (decisionVariableType == AttributeType.Discrete && (array[0].ToString() == "numeric" || array[0].ToString() == "real" || array.Length == 1)) { throw new InvalidOperationException("Variable Type Should be Continous, not Discrete"); } List <string> myList = new List <string>(); foreach (string s in array) { //if s contains real, do decision tree learning for real values myList.Add(s.Replace("{", "").Replace("}", "").Trim()); } AttributeListInfo myAttributeInfo = new AttributeListInfo(); //define information for each attribute, then add to dictionary myAttributeInfo.NumberOfInputs = myList.Count; //array.Length myAttributeInfo.Inputs = myList; attributes.Add(attr.Name, myAttributeInfo); if (_attributes.Count < attributeList.Count - 1) { _attributes.Add(new Attribute(attr.Name, myAttributeInfo.NumberOfInputs)); queryList.Add(new Query(attr.Name, myList)); } else { lastAttrName = attr.Name; } } List <double[]> inputs2 = new List <double[]>(); List <double[]> inputsAll = new List <double[]>(); List <int> outputs2 = new List <int>(); object[] instance; while ((instance = arffReader.ReadInstance()) != null) { int Length = instance.Length; double[] newRow = new double[Length - 1]; double[] row = new double[Length]; for (int i = 0; i < Length - 1; i++) { Double.TryParse(instance[i] + "", out newRow[i]); } for (int i = 0; i < Length; i++) { Double.TryParse(instance[i] + "", out row[i]); } inputs2.Add(newRow); inputsAll.Add(row); outputs2.Add(Int32.Parse(instance[Length - 1] + "")); } dInputs = inputs2.ToArray(); //inputs (expect the last index) allDInputs = inputsAll.ToArray(); //all double inputs outputs = outputs2.ToArray(); //decide what algorithm to use based on options. if (learningAlgorithm == LearningAlgorithm.C45Learning && decisionVariableType == AttributeType.Continuous) { c45 = new C45Algorithm(); var stopwatch = new Stopwatch(); stopwatch.Start(); tree = c45.Learn(dInputs, outputs); //induce tree from data stopwatch.Stop(); elapsedTime = stopwatch.ElapsedMilliseconds; } else if (learningAlgorithm == LearningAlgorithm.C45Learning && decisionVariableType == AttributeType.Both) { c45 = new C45Algorithm(); var stopwatch = new Stopwatch(); stopwatch.Start(); tree = c45.Learn(dInputs, outputs); //induce tree from data stopwatch.Stop(); elapsedTime = stopwatch.ElapsedMilliseconds; } else if (learningAlgorithm == LearningAlgorithm.C45Learning && decisionVariableType == AttributeType.Discrete) { c45 = new C45Algorithm(_attributes.ToArray()); var stopwatch = new Stopwatch(); stopwatch.Start(); tree = c45.Learn(dInputs, outputs); //induce tree from data stopwatch.Stop(); elapsedTime = stopwatch.ElapsedMilliseconds; } rules = tree.ToRules(attributes, decisionVariableType); predicted = tree.ComputeDecision(dInputs); } }