Exemplo n.º 1
0
		/// <summary> Reads and stores header of an ARFF file.
		/// 
		/// </summary>
		/// <param name="tokenizer">the stream tokenizer
		/// </param>
		/// <exception cref="IOException">if the information is not read 
		/// successfully
		/// </exception>
		protected internal virtual void  readHeader(StreamTokenizer tokenizer)
		{
			
			System.String attributeName;
			FastVector attributeValues;
			//int i;
            Token token=null;
			// Get name of relation.
			getFirstToken(tokenizer, out token);
			//if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_EOF)
            if ((token != null)   && (token is EofToken))
			{
				errms(tokenizer, "premature end of file");
			}
			if (ARFF_RELATION.ToUpper().Equals(token.StringValue.ToUpper()))
			{
				getNextToken(tokenizer,out token);
				m_RelationName = token.StringValue;
				getLastToken(tokenizer,out token, false);
			}
			else
			{
				errms(tokenizer, "keyword " + ARFF_RELATION + " expected");
			}
			
			// Create vectors to hold information temporarily.
			m_Attributes = new FastVector();
			
			// Get attribute declarations.
			getFirstToken(tokenizer, out token);
			//if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_EOF)
            if ((token != null) && (token is EofToken))
			{
				errms(tokenizer, "premature end of file");
			}
			
			while (Attribute.ARFF_ATTRIBUTE.ToUpper().Equals(token.StringValue.ToUpper()))
			{
				
				// Get attribute name.
				getNextToken(tokenizer,out token);
				attributeName = token.StringValue;
				getNextToken(tokenizer,out token);
				
				// Check if attribute is nominal.
				//if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_WORD)
                if ((token != null) && (token is WordToken))
				{
					
					// Attribute is real, integer, or string.
                    if (token.StringValue.ToUpper().Equals(Attribute.ARFF_ATTRIBUTE_REAL.ToUpper()) || token.StringValue.ToUpper().Equals(Attribute.ARFF_ATTRIBUTE_INTEGER.ToUpper()) || token.StringValue.ToUpper().Equals(Attribute.ARFF_ATTRIBUTE_NUMERIC.ToUpper()))
					{
						m_Attributes.addElement(new Attribute(attributeName, numAttributes()));
						readTillEOL(tokenizer);
					}
                    else if (token.StringValue.ToUpper().Equals(Attribute.ARFF_ATTRIBUTE_STRING.ToUpper()))
					{
						m_Attributes.addElement(new Attribute(attributeName, (FastVector) null, numAttributes()));
						readTillEOL(tokenizer);
					}
                    else if (token.StringValue.ToUpper().Equals(Attribute.ARFF_ATTRIBUTE_DATE.ToUpper()))
					{
						System.String format = null;
                        tokenizer.NextToken(out token);
						//if (tokenizer.NextToken() != SupportClass.StreamTokenizerSupport.TT_EOL)
                        if ((token != null) && (!(token is EofToken)))
						{
							//if ((tokenizer.ttype != SupportClass.StreamTokenizerSupport.TT_WORD) && (tokenizer.ttype != '\'') && (tokenizer.ttype != '\"'))
                            if ((token != null) && (!(token is WordToken)) && (token.StringValue!="'") && (token.StringValue!="\"") )
							{
								errms(tokenizer, "not a valid date format");
							}
							format = token.StringValue;
							readTillEOL(tokenizer);
						}
						else
						{
							tokenizer.PushBack(token);
						}
						m_Attributes.addElement(new Attribute(attributeName, format, numAttributes()));
					}
					else
					{
						errms(tokenizer, "no valid attribute type or invalid " + "enumeration");
					}
				}
				else
				{
					
					// Attribute is nominal.
					attributeValues = new FastVector();
					tokenizer.PushBack(token);
					
					// Get values for nominal attribute.
                    tokenizer.NextToken(out token);
					if ( token.StringValue != "{")
					{
						errms(tokenizer, "{ expected at beginning of enumeration");
					}
                    tokenizer.NextToken(out token);
					while ( token.StringValue != "}")
					{
						//if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_EOL)
                        if (token is EolToken)
						{
							errms(tokenizer, "} expected at end of enumeration");
						}
						else
						{
							attributeValues.addElement(token.StringValue);
						}

                        tokenizer.NextToken(out token);
					}
					if (attributeValues.size() == 0)
					{
						errms(tokenizer, "no nominal values found");
					}
					m_Attributes.addElement(new Attribute(attributeName, attributeValues, numAttributes()));
				}
				getLastToken(tokenizer,out token, false);
				getFirstToken(tokenizer,out token);
				//if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_EOF)
                if (token is EofToken)
					errms(tokenizer, "premature end of file");
			}
			
			// Check if data part follows. We can't easily check for EOL.
			if (!ARFF_DATA.ToUpper().Equals(token.StringValue.ToUpper()))
			{
				errms(tokenizer, "keyword " + ARFF_DATA + " expected");
			}
			
			// Check if any attributes have been declared.
			if (m_Attributes.size() == 0)
			{
				errms(tokenizer, "no attributes declared");
			}
			
			// Allocate buffers in case sparse instances have to be read
			m_ValueBuffer = new double[numAttributes()];
			m_IndicesBuffer = new int[numAttributes()];
            
            
		}
Exemplo n.º 2
0
		/// <summary> Reads and skips all tokens before next end of line token.
		/// 
		/// </summary>
		/// <param name="tokenizer">the stream tokenizer
		/// </param>
		protected internal virtual void  readTillEOL(StreamTokenizer tokenizer)
		{

            Token token;
            tokenizer.NextToken(out token);
			while (!(token is EolToken))
			{
                tokenizer.NextToken(out token);
			} ;
			tokenizer.PushBack(token);
		}
Exemplo n.º 3
0
		/// <summary> Reads a single instance using the tokenizer and appends it
		/// to the dataset. Automatically expands the dataset if it
		/// is not large enough to hold the instance.
		/// 
		/// </summary>
		/// <param name="tokenizer">the tokenizer to be used
		/// </param>
		/// <param name="flag">if method should test for carriage return after 
		/// each instance
		/// </param>
		/// <returns> false if end of file has been reached
		/// </returns>
		/// <exception cref="IOException">if the information is not read 
		/// successfully
		/// </exception>
		protected internal virtual bool getInstance(StreamTokenizer tokenizer, bool flag)
		{
			
			// Check if any attributes have been declared.
			if (m_Attributes.size() == 0)
			{
				errms(tokenizer, "no header information available");
			}
			
			// Check if end of file reached.
			//getFirstToken(tokenizer);
            Token token;

            getFirstToken(tokenizer, out token);
            tokenizer.PushBack(token);
          

			//if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_EOF)
            if (token is EofToken)
			{
				return false;
			}

            
			// Parse instance
			//if (tokenizer.ttype == '{')
            if ((token is CharToken) && (token.StringValue == "{"))
			{
				return getInstanceSparse(tokenizer, flag);
			}
			else
			{
				return getInstanceFull(tokenizer, flag);
			}
		}