/// <summary> /// Reads the data. Assuming the data is in ARFF format. /// </summary> /// <param name="fileName">Name of the file.</param> /// <param name="isSampleData">if set to <c>true</c> [is sample data].</param> public static void ReadData(string fileName, bool isSampleData) { string line; var file = new System.IO.StreamReader(fileName); var isData = false; //skip the first two line in the ARFF file because they're neither attributes nor data. for (var i = 0; i < 2; i++) { file.ReadLine(); } Console.WriteLine("start reading attributes"); while ((line = file.ReadLine()) != null) { if (line.Equals("@data")) { isData = true; line = file.ReadLine(); Console.WriteLine("start reading data"); } if (!isData) { PopulateAttributes(line, isSampleData); } else { PopulateSampleData(line, isSampleData); } } //removing the last attribute because that's the target attribute ("Class"). if (isSampleData) { RemainingAttributes.RemoveAt(RemainingAttributes.Count - 1); } }
/// <summary> /// Populates the attributes. /// </summary> /// <param name="line">The line.</param> /// <param name="isSampleData">if set to <c>true</c> [is sample data].</param> public static void PopulateAttributes(string line, bool isSampleData) { if (!string.IsNullOrEmpty(line)) { var attribute = new Attribute(); //Parsing the attributes by removing the {} and then split by comma. var firstSpaceIndex = line.IndexOf(" ", StringComparison.Ordinal); var firstOpenParentIndex = line.IndexOf("{", StringComparison.Ordinal); var lastCloseParentIndex = line.LastIndexOf("}", StringComparison.Ordinal); attribute.AttributeName = line.Substring(firstSpaceIndex + 1, firstOpenParentIndex - firstSpaceIndex - 2).RemoveSingleQuoteIfAny(); var valueParts = line.Substring(firstOpenParentIndex + 1, lastCloseParentIndex - 1 - firstOpenParentIndex).Split(','); attribute.PossibleValues.AddRange(valueParts.Select(value => value.RemoveSingleQuoteIfAny()).ToList()); if (isSampleData) { AllAttributes.Add(attribute); RemainingAttributes.Add(attribute); } } }