/// <summary> Reads and stores header of an ARFF file. /// /// </summary> /// <param name="tokenizer">the stream tokenizer /// </param> /// <exception cref="IOException">if the information is not read /// successfully /// </exception> protected internal virtual void readHeader(StreamTokenizer tokenizer) { System.String attributeName; FastVector attributeValues; //int i; Token token=null; // Get name of relation. getFirstToken(tokenizer, out token); //if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_EOF) if ((token != null) && (token is EofToken)) { errms(tokenizer, "premature end of file"); } if (ARFF_RELATION.ToUpper().Equals(token.StringValue.ToUpper())) { getNextToken(tokenizer,out token); m_RelationName = token.StringValue; getLastToken(tokenizer,out token, false); } else { errms(tokenizer, "keyword " + ARFF_RELATION + " expected"); } // Create vectors to hold information temporarily. m_Attributes = new FastVector(); // Get attribute declarations. getFirstToken(tokenizer, out token); //if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_EOF) if ((token != null) && (token is EofToken)) { errms(tokenizer, "premature end of file"); } while (Attribute.ARFF_ATTRIBUTE.ToUpper().Equals(token.StringValue.ToUpper())) { // Get attribute name. getNextToken(tokenizer,out token); attributeName = token.StringValue; getNextToken(tokenizer,out token); // Check if attribute is nominal. //if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_WORD) if ((token != null) && (token is WordToken)) { // Attribute is real, integer, or string. if (token.StringValue.ToUpper().Equals(Attribute.ARFF_ATTRIBUTE_REAL.ToUpper()) || token.StringValue.ToUpper().Equals(Attribute.ARFF_ATTRIBUTE_INTEGER.ToUpper()) || token.StringValue.ToUpper().Equals(Attribute.ARFF_ATTRIBUTE_NUMERIC.ToUpper())) { m_Attributes.addElement(new Attribute(attributeName, numAttributes())); readTillEOL(tokenizer); } else if (token.StringValue.ToUpper().Equals(Attribute.ARFF_ATTRIBUTE_STRING.ToUpper())) { m_Attributes.addElement(new Attribute(attributeName, (FastVector) null, numAttributes())); readTillEOL(tokenizer); } else if (token.StringValue.ToUpper().Equals(Attribute.ARFF_ATTRIBUTE_DATE.ToUpper())) { System.String format = null; tokenizer.NextToken(out token); //if (tokenizer.NextToken() != SupportClass.StreamTokenizerSupport.TT_EOL) if ((token != null) && (!(token is EofToken))) { //if ((tokenizer.ttype != SupportClass.StreamTokenizerSupport.TT_WORD) && (tokenizer.ttype != '\'') && (tokenizer.ttype != '\"')) if ((token != null) && (!(token is WordToken)) && (token.StringValue!="'") && (token.StringValue!="\"") ) { errms(tokenizer, "not a valid date format"); } format = token.StringValue; readTillEOL(tokenizer); } else { tokenizer.PushBack(token); } m_Attributes.addElement(new Attribute(attributeName, format, numAttributes())); } else { errms(tokenizer, "no valid attribute type or invalid " + "enumeration"); } } else { // Attribute is nominal. attributeValues = new FastVector(); tokenizer.PushBack(token); // Get values for nominal attribute. tokenizer.NextToken(out token); if ( token.StringValue != "{") { errms(tokenizer, "{ expected at beginning of enumeration"); } tokenizer.NextToken(out token); while ( token.StringValue != "}") { //if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_EOL) if (token is EolToken) { errms(tokenizer, "} expected at end of enumeration"); } else { attributeValues.addElement(token.StringValue); } tokenizer.NextToken(out token); } if (attributeValues.size() == 0) { errms(tokenizer, "no nominal values found"); } m_Attributes.addElement(new Attribute(attributeName, attributeValues, numAttributes())); } getLastToken(tokenizer,out token, false); getFirstToken(tokenizer,out token); //if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_EOF) if (token is EofToken) errms(tokenizer, "premature end of file"); } // Check if data part follows. We can't easily check for EOL. if (!ARFF_DATA.ToUpper().Equals(token.StringValue.ToUpper())) { errms(tokenizer, "keyword " + ARFF_DATA + " expected"); } // Check if any attributes have been declared. if (m_Attributes.size() == 0) { errms(tokenizer, "no attributes declared"); } // Allocate buffers in case sparse instances have to be read m_ValueBuffer = new double[numAttributes()]; m_IndicesBuffer = new int[numAttributes()]; }
/// <summary> Reads and skips all tokens before next end of line token. /// /// </summary> /// <param name="tokenizer">the stream tokenizer /// </param> protected internal virtual void readTillEOL(StreamTokenizer tokenizer) { Token token; tokenizer.NextToken(out token); while (!(token is EolToken)) { tokenizer.NextToken(out token); } ; tokenizer.PushBack(token); }
/// <summary> Reads a single instance using the tokenizer and appends it /// to the dataset. Automatically expands the dataset if it /// is not large enough to hold the instance. /// /// </summary> /// <param name="tokenizer">the tokenizer to be used /// </param> /// <param name="flag">if method should test for carriage return after /// each instance /// </param> /// <returns> false if end of file has been reached /// </returns> /// <exception cref="IOException">if the information is not read /// successfully /// </exception> protected internal virtual bool getInstance(StreamTokenizer tokenizer, bool flag) { // Check if any attributes have been declared. if (m_Attributes.size() == 0) { errms(tokenizer, "no header information available"); } // Check if end of file reached. //getFirstToken(tokenizer); Token token; getFirstToken(tokenizer, out token); tokenizer.PushBack(token); //if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_EOF) if (token is EofToken) { return false; } // Parse instance //if (tokenizer.ttype == '{') if ((token is CharToken) && (token.StringValue == "{")) { return getInstanceSparse(tokenizer, flag); } else { return getInstanceFull(tokenizer, flag); } }