public static void Main() { ArffParser arffParser = new ArffParser(); ArffRecord arffRecord = arffParser.Parse("../../../ArffParser/TestArffFiles/SmallTest.arff"); Console.WriteLine(arffRecord.ToString()); Console.ReadLine(); }
public void ParseTest_SmallTestArff() { var arffParser = new ArffParser.ArffParser(); ArffParser.ArffRecord record = arffParser.Parse("../../TestArffFiles/SmallTest.arff"); Assert.IsTrue(record.RelationName == "cars"); Assert.IsTrue(record.Attributes.Count == 5); // @ATTRIBUTE buying { vhigh, high, med, low } Assert.IsTrue(record.Attributes[0].Name == "buying"); Assert.IsTrue(record.Attributes[0].Type == ArffParser.AttributeType.CLASS); Assert.IsTrue(((NominalAttribute)record.Attributes[0]).Values.Count == 4); Assert.IsTrue(((NominalAttribute)record.Attributes[0]).Values.Contains("vhigh")); Assert.IsTrue(((NominalAttribute)record.Attributes[0]).Values.Contains("high")); Assert.IsTrue(((NominalAttribute)record.Attributes[0]).Values.Contains("med")); Assert.IsTrue(((NominalAttribute)record.Attributes[0]).Values.Contains("low")); // @ATTRIBUTE price NUMERIC Assert.IsTrue(record.Attributes[1].Name == "price"); Assert.IsTrue(record.Attributes[1].Type == ArffParser.AttributeType.NUMERIC); // @ATTRIBUTE miles numeric Assert.IsTrue(record.Attributes[2].Name == "miles"); Assert.IsTrue(record.Attributes[2].Type == ArffParser.AttributeType.NUMERIC); // @ATTRIBUTE make STRING Assert.IsTrue(record.Attributes[3].Name == "make"); Assert.IsTrue(record.Attributes[3].Type == ArffParser.AttributeType.STRING); // @ATTRIBUTE purchaseDate STRING Assert.IsTrue(record.Attributes[4].Name == "purchaseDate"); Assert.IsTrue(record.Attributes[4].Type == ArffParser.AttributeType.DATE); // @DATA Assert.IsTrue(record.Data.Count == 3); }
// sets up a state machine and parses according to the expected structure of ARFF // structure is: // @relation <identifier> // // @attribute <identifier> // <more attributes ...> // // @data // <data entries ...> public ArffRecord Parse(string filename) { _curParseFileName = filename; _curParseLineNumber = 0; string[] rawFileLines = System.IO.File.ReadAllLines(filename); string[] fileLines = PreprocessFileLines(rawFileLines); ArffRecord arffRecord = new ArffRecord(); arffRecord.FileName = filename; ArffState parseState = ArffState.HEADER; for (_curParseLineNumber = 1; _curParseLineNumber <= fileLines.Length; _curParseLineNumber++) { string fileLine = fileLines[_curParseLineNumber - 1]; if (fileLine.Length == 0 || fileLine[0] == COMMENT_TOKEN) { continue; // skip comment lines and new lines } fileLine = fileLine.Split(COMMENT_TOKEN)[0].Trim(); // remove any trailing comments switch (parseState) { case ArffState.HEADER: if (fileLine.Contains(RELATION_TOKEN)) { arffRecord.RelationName = TryParseRelation(fileLine); parseState = ArffState.ATTRIBUTE; } else { // non empty line present in header state that did not contain relation token throw new ArffParseException(_curParseFileName, _curParseLineNumber, "expected @relation declaration"); } break; case ArffState.ATTRIBUTE: if (fileLine.Contains(ATTRIBUTE_TOKEN)) { var attribute = TryParseAttribute(fileLine); arffRecord.Attributes.Add(attribute); } else if (fileLine.Contains(DATA_TOKEN)) { parseState = ArffState.DATA; } else { throw new ArffParseException(_curParseFileName, _curParseLineNumber, "expected @attribute declaration"); } break; case ArffState.DATA: var data = TryParseData(fileLine, arffRecord.Attributes); arffRecord.Data.Add(data); break; } } return(arffRecord); }