Пример #1
0
        public static void Main()
        {
            ArffParser arffParser = new ArffParser();
            ArffRecord arffRecord = arffParser.Parse("../../../ArffParser/TestArffFiles/SmallTest.arff");

            Console.WriteLine(arffRecord.ToString());
            Console.ReadLine();
        }
Пример #2
0
        public void ParseTest_SmallTestArff()
        {
            var arffParser = new ArffParser.ArffParser();

            ArffParser.ArffRecord record = arffParser.Parse("../../TestArffFiles/SmallTest.arff");

            Assert.IsTrue(record.RelationName == "cars");
            Assert.IsTrue(record.Attributes.Count == 5);

            // @ATTRIBUTE	buying       { vhigh, high, med, low }
            Assert.IsTrue(record.Attributes[0].Name == "buying");
            Assert.IsTrue(record.Attributes[0].Type == ArffParser.AttributeType.CLASS);
            Assert.IsTrue(((NominalAttribute)record.Attributes[0]).Values.Count == 4);
            Assert.IsTrue(((NominalAttribute)record.Attributes[0]).Values.Contains("vhigh"));
            Assert.IsTrue(((NominalAttribute)record.Attributes[0]).Values.Contains("high"));
            Assert.IsTrue(((NominalAttribute)record.Attributes[0]).Values.Contains("med"));
            Assert.IsTrue(((NominalAttribute)record.Attributes[0]).Values.Contains("low"));

            // @ATTRIBUTE price NUMERIC
            Assert.IsTrue(record.Attributes[1].Name == "price");
            Assert.IsTrue(record.Attributes[1].Type == ArffParser.AttributeType.NUMERIC);

            // @ATTRIBUTE miles numeric
            Assert.IsTrue(record.Attributes[2].Name == "miles");
            Assert.IsTrue(record.Attributes[2].Type == ArffParser.AttributeType.NUMERIC);

            // @ATTRIBUTE make STRING
            Assert.IsTrue(record.Attributes[3].Name == "make");
            Assert.IsTrue(record.Attributes[3].Type == ArffParser.AttributeType.STRING);

            // @ATTRIBUTE purchaseDate STRING
            Assert.IsTrue(record.Attributes[4].Name == "purchaseDate");
            Assert.IsTrue(record.Attributes[4].Type == ArffParser.AttributeType.DATE);

            // @DATA
            Assert.IsTrue(record.Data.Count == 3);
        }
Пример #3
0
        // sets up a state machine and parses according to the expected structure of ARFF
        // structure is:
        // @relation <identifier>
        //
        // @attribute <identifier>
        // <more attributes ...>
        //
        // @data
        // <data entries ...>
        public ArffRecord Parse(string filename)
        {
            _curParseFileName   = filename;
            _curParseLineNumber = 0;

            string[] rawFileLines = System.IO.File.ReadAllLines(filename);
            string[] fileLines    = PreprocessFileLines(rawFileLines);

            ArffRecord arffRecord = new ArffRecord();

            arffRecord.FileName = filename;

            ArffState parseState = ArffState.HEADER;

            for (_curParseLineNumber = 1; _curParseLineNumber <= fileLines.Length; _curParseLineNumber++)
            {
                string fileLine = fileLines[_curParseLineNumber - 1];
                if (fileLine.Length == 0 || fileLine[0] == COMMENT_TOKEN)
                {
                    continue;                                       // skip comment lines and new lines
                }
                fileLine = fileLine.Split(COMMENT_TOKEN)[0].Trim(); // remove any trailing comments

                switch (parseState)
                {
                case ArffState.HEADER:
                    if (fileLine.Contains(RELATION_TOKEN))
                    {
                        arffRecord.RelationName = TryParseRelation(fileLine);
                        parseState = ArffState.ATTRIBUTE;
                    }
                    else
                    {
                        // non empty line present in header state that did not contain relation token
                        throw new ArffParseException(_curParseFileName, _curParseLineNumber, "expected @relation declaration");
                    }
                    break;

                case ArffState.ATTRIBUTE:
                    if (fileLine.Contains(ATTRIBUTE_TOKEN))
                    {
                        var attribute = TryParseAttribute(fileLine);
                        arffRecord.Attributes.Add(attribute);
                    }
                    else if (fileLine.Contains(DATA_TOKEN))
                    {
                        parseState = ArffState.DATA;
                    }
                    else
                    {
                        throw new ArffParseException(_curParseFileName, _curParseLineNumber, "expected @attribute declaration");
                    }
                    break;

                case ArffState.DATA:
                    var data = TryParseData(fileLine, arffRecord.Attributes);
                    arffRecord.Data.Add(data);
                    break;
                }
            }

            return(arffRecord);
        }