예제 #1
0
		/// <summary> Loads a cost matrix in the old format from a reader. Adapted from code once sitting 
		/// in Instances.java
		/// 
		/// </summary>
		/// <param name="reader">the reader to get the values from.
		/// </param>
		/// <exception cref="Exception">if the matrix cannot be read correctly.
		/// </exception>
		//UPGRADE_ISSUE: Class hierarchy differences between 'java.io.Reader' and 'System.IO.StreamReader' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1186'"
		public virtual void  readOldFormat(System.IO.StreamReader reader)
		{
			
			StreamTokenizer tokenizer;
			Token currentToken;
			double firstIndex, secondIndex, weight;
			
			tokenizer = new StreamTokenizer(reader);
			
			initialize();
			
			tokenizer.Settings.CommentChar('%');
			tokenizer.Settings.GrabEol=true;
            tokenizer.NextToken(out currentToken);
			//while (SupportClass.StreamTokenizerSupport.TT_EOF != (currentToken = tokenizer.NextToken()))
            while (!(currentToken is EofToken))
			{
				
				// Skip empty lines 
				//if (currentToken == SupportClass.StreamTokenizerSupport.TT_EOL)
                if (currentToken is EolToken)
				{
					continue;
				}
				
				// Get index of first class.
				//if (currentToken != SupportClass.StreamTokenizerSupport.TT_NUMBER)
                if (!((currentToken is FloatToken)|| (currentToken is IntToken)))
				{
					throw new System.Exception("Only numbers and comments allowed " + "in cost file!");
				}
				//firstIndex = tokenizer.nval;
                firstIndex = Convert.ToDouble(currentToken.StringValue);
				//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
				if (!Utils.eq((double) firstIndex, firstIndex))
				{
					throw new System.Exception("First number in line has to be " + "index of a class!");
				}
				//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
				if ((int) firstIndex >= size())
				{
					throw new System.Exception("Class index out of range!");
				}
				
				// Get index of second class.
				//if (SupportClass.StreamTokenizerSupport.TT_EOF == (currentToken = tokenizer.NextToken()))
                tokenizer.NextToken(out currentToken);
                if (currentToken is EofToken)
				{
					throw new System.Exception("Premature end of file!");
				}
				//if (currentToken == SupportClass.StreamTokenizerSupport.TT_EOL)
                if (currentToken is EolToken)
				{
					throw new System.Exception("Premature end of line!");
				}
				//if (currentToken != SupportClass.StreamTokenizerSupport.TT_NUMBER)
                if  (!((currentToken is IntToken) || (currentToken is FloatToken)))
				{
					throw new System.Exception("Only numbers and comments allowed " + "in cost file!");
				}
				//secondIndex = tokenizer.nval;
                secondIndex = Convert.ToDouble(currentToken.StringValue);
				//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
				if (!Utils.eq((double) secondIndex, secondIndex))
				{
					throw new System.Exception("Second number in line has to be " + "index of a class!");
				}
				//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
				if ((int) secondIndex >= size())
				{
					throw new System.Exception("Class index out of range!");
				}
				//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
				if ((int) secondIndex == (int) firstIndex)
				{
					throw new System.Exception("Diagonal of cost matrix non-zero!");
				}
				
				// Get cost factor.

                tokenizer.NextToken(out currentToken);
                if (currentToken is EofToken)

				//if (SupportClass.StreamTokenizerSupport.TT_EOF == (currentToken = tokenizer.NextToken()))
				{
					throw new System.Exception("Premature end of file!");
				}
				//if (currentToken == SupportClass.StreamTokenizerSupport.TT_EOL)
                if (currentToken is EolToken)
				{
					throw new System.Exception("Premature end of line!");
				}

				//if (currentToken != SupportClass.StreamTokenizerSupport.TT_NUMBER)
                if (!((currentToken is IntToken) || (currentToken is FloatToken)))
				{
					throw new System.Exception("Only numbers and comments allowed " + "in cost file!");
				}
                weight = Convert.ToDouble(currentToken.StringValue);
				if (!Utils.gr(weight, 0))
				{
					throw new System.Exception("Only positive weights allowed!");
				}
				//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
				setXmlElement((int) firstIndex, (int) secondIndex, weight);

                tokenizer.NextToken(out currentToken);
			}
		}
예제 #2
0
		/// <summary> Gets next token, checking for a premature and of line.
		/// 
		/// </summary>
		/// <param name="tokenizer">the stream tokenizer
		/// </param>
		/// <exception cref="IOException">if it finds a premature end of line
		/// </exception>
		protected internal virtual void  getNextToken(StreamTokenizer tokenizer,out Token token)
		{
            tokenizer.NextToken(out token);
			if (token is EolToken)
			{
				errms(tokenizer, "premature end of line");
			}
            if (token is EofToken)
			{
				errms(tokenizer, "premature end of file");
			}
			//else if ((tokenizer.ttype == '\'') || (tokenizer.ttype == '"'))
            else if ((token is CharToken) && ((token.StringValue == "'") || (token.StringValue == "\"") ))
			{
				//tokenizer.ttype = SupportClass.StreamTokenizerSupport.TT_WORD;
			}
			//else if ((tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_WORD) && (tokenizer.sval.Equals("?")))
            else if ((token is CharToken) && (token.StringValue == "?"))
			{
				//tokenizer.ttype = '?';
			}
		}
예제 #3
0
		/// <summary> Reads and skips all tokens before next end of line token.
		/// 
		/// </summary>
		/// <param name="tokenizer">the stream tokenizer
		/// </param>
		protected internal virtual void  readTillEOL(StreamTokenizer tokenizer)
		{

            Token token;
            tokenizer.NextToken(out token);
			while (!(token is EolToken))
			{
                tokenizer.NextToken(out token);
			} ;
			tokenizer.PushBack(token);
		}
예제 #4
0
		/// <summary> Gets token and checks if its end of line.
		/// 
		/// </summary>
		/// <param name="tokenizer">the stream tokenizer
		/// </param>
		/// <exception cref="IOException">if it doesn't find an end of line
		/// </exception>
		protected internal virtual void  getLastToken(StreamTokenizer tokenizer,out Token token , bool endOfFileOk)
		{
			tokenizer.NextToken(out token);
			if ( (!(token is EolToken)) && ( (!(token is EofToken))  || !endOfFileOk))
			{
				errms(tokenizer, "end of line expected");
			}
		}
예제 #5
0
		/// <summary> Gets index, checking for a premature and of line.
		/// 
		/// </summary>
		/// <param name="tokenizer">the stream tokenizer
		/// </param>
		/// <exception cref="IOException">if it finds a premature end of line
		/// </exception>
		protected internal virtual void  getIndex(StreamTokenizer tokenizer, out Token token)
		{
			tokenizer.NextToken(out token);
            if (token is EolToken)
			{
				errms(tokenizer, "premature end of line");
			}
            if (token is EofToken)
			{
				errms(tokenizer, "premature end of file");
			}
		}
예제 #6
0
		/// <summary> Gets next token, skipping empty lines.
		/// 
		/// </summary>
		/// <param name="tokenizer">the stream tokenizer
		/// </param>
		/// <exception cref="IOException">if reading the next token fails
		/// </exception>
		protected internal virtual void  getFirstToken(StreamTokenizer tokenizer,out Token token)
		{
			
			//while (tokenizer.NextToken() == SupportClass.StreamTokenizerSupport.TT_EOL)
            tokenizer.NextToken(out token);
            while(token is EolToken)
			{
                tokenizer.NextToken(out token);
			} ;

			//if ((tokenizer.ttype == '\'') || (tokenizer.ttype == '"'))
            //if ((token.StringValue == "'") || (token.StringValue == "\"") )
			//{
				//tokenizer.ttype = SupportClass.StreamTokenizerSupport.TT_WORD;
			//}
			//else if ((tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_WORD) && (tokenizer.sval.Equals("?")))
			//{
			//	tokenizer.ttype = '?';
			//}
		}
예제 #7
0
		/// <summary> Reads and stores header of an ARFF file.
		/// 
		/// </summary>
		/// <param name="tokenizer">the stream tokenizer
		/// </param>
		/// <exception cref="IOException">if the information is not read 
		/// successfully
		/// </exception>
		protected internal virtual void  readHeader(StreamTokenizer tokenizer)
		{
			
			System.String attributeName;
			FastVector attributeValues;
			//int i;
            Token token=null;
			// Get name of relation.
			getFirstToken(tokenizer, out token);
			//if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_EOF)
            if ((token != null)   && (token is EofToken))
			{
				errms(tokenizer, "premature end of file");
			}
			if (ARFF_RELATION.ToUpper().Equals(token.StringValue.ToUpper()))
			{
				getNextToken(tokenizer,out token);
				m_RelationName = token.StringValue;
				getLastToken(tokenizer,out token, false);
			}
			else
			{
				errms(tokenizer, "keyword " + ARFF_RELATION + " expected");
			}
			
			// Create vectors to hold information temporarily.
			m_Attributes = new FastVector();
			
			// Get attribute declarations.
			getFirstToken(tokenizer, out token);
			//if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_EOF)
            if ((token != null) && (token is EofToken))
			{
				errms(tokenizer, "premature end of file");
			}
			
			while (Attribute.ARFF_ATTRIBUTE.ToUpper().Equals(token.StringValue.ToUpper()))
			{
				
				// Get attribute name.
				getNextToken(tokenizer,out token);
				attributeName = token.StringValue;
				getNextToken(tokenizer,out token);
				
				// Check if attribute is nominal.
				//if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_WORD)
                if ((token != null) && (token is WordToken))
				{
					
					// Attribute is real, integer, or string.
                    if (token.StringValue.ToUpper().Equals(Attribute.ARFF_ATTRIBUTE_REAL.ToUpper()) || token.StringValue.ToUpper().Equals(Attribute.ARFF_ATTRIBUTE_INTEGER.ToUpper()) || token.StringValue.ToUpper().Equals(Attribute.ARFF_ATTRIBUTE_NUMERIC.ToUpper()))
					{
						m_Attributes.addElement(new Attribute(attributeName, numAttributes()));
						readTillEOL(tokenizer);
					}
                    else if (token.StringValue.ToUpper().Equals(Attribute.ARFF_ATTRIBUTE_STRING.ToUpper()))
					{
						m_Attributes.addElement(new Attribute(attributeName, (FastVector) null, numAttributes()));
						readTillEOL(tokenizer);
					}
                    else if (token.StringValue.ToUpper().Equals(Attribute.ARFF_ATTRIBUTE_DATE.ToUpper()))
					{
						System.String format = null;
                        tokenizer.NextToken(out token);
						//if (tokenizer.NextToken() != SupportClass.StreamTokenizerSupport.TT_EOL)
                        if ((token != null) && (!(token is EofToken)))
						{
							//if ((tokenizer.ttype != SupportClass.StreamTokenizerSupport.TT_WORD) && (tokenizer.ttype != '\'') && (tokenizer.ttype != '\"'))
                            if ((token != null) && (!(token is WordToken)) && (token.StringValue!="'") && (token.StringValue!="\"") )
							{
								errms(tokenizer, "not a valid date format");
							}
							format = token.StringValue;
							readTillEOL(tokenizer);
						}
						else
						{
							tokenizer.PushBack(token);
						}
						m_Attributes.addElement(new Attribute(attributeName, format, numAttributes()));
					}
					else
					{
						errms(tokenizer, "no valid attribute type or invalid " + "enumeration");
					}
				}
				else
				{
					
					// Attribute is nominal.
					attributeValues = new FastVector();
					tokenizer.PushBack(token);
					
					// Get values for nominal attribute.
                    tokenizer.NextToken(out token);
					if ( token.StringValue != "{")
					{
						errms(tokenizer, "{ expected at beginning of enumeration");
					}
                    tokenizer.NextToken(out token);
					while ( token.StringValue != "}")
					{
						//if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_EOL)
                        if (token is EolToken)
						{
							errms(tokenizer, "} expected at end of enumeration");
						}
						else
						{
							attributeValues.addElement(token.StringValue);
						}

                        tokenizer.NextToken(out token);
					}
					if (attributeValues.size() == 0)
					{
						errms(tokenizer, "no nominal values found");
					}
					m_Attributes.addElement(new Attribute(attributeName, attributeValues, numAttributes()));
				}
				getLastToken(tokenizer,out token, false);
				getFirstToken(tokenizer,out token);
				//if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_EOF)
                if (token is EofToken)
					errms(tokenizer, "premature end of file");
			}
			
			// Check if data part follows. We can't easily check for EOL.
			if (!ARFF_DATA.ToUpper().Equals(token.StringValue.ToUpper()))
			{
				errms(tokenizer, "keyword " + ARFF_DATA + " expected");
			}
			
			// Check if any attributes have been declared.
			if (m_Attributes.size() == 0)
			{
				errms(tokenizer, "no attributes declared");
			}
			
			// Allocate buffers in case sparse instances have to be read
			m_ValueBuffer = new double[numAttributes()];
			m_IndicesBuffer = new int[numAttributes()];
            
            
		}
예제 #8
0
		/// <summary> Read a matrix from a stream.  The format is the same the print method,
		/// so printed matrices can be read back in (provided they were printed using
		/// US Locale).  XmlElements are separated by
		/// whitespace, all the elements for each row appear on a single line,
		/// the last row is followed by a blank line.
		/// <p/>
		/// Note: This format differs from the one that can be read via the
		/// Matrix(Reader) constructor! For that format, the write(Writer) method
		/// is used (from the original weka.core.Matrix class).
		/// 
		/// </summary>
		/// <param name="input">the input stream.
		/// </param>
		/// <seealso cref="Matrix(Reader)">
		/// </seealso>
		/// <seealso cref="write(Writer)">
		/// </seealso>
		public static Matrix read(System.IO.StreamReader input)
		{
			StreamTokenizer tokenizer = new StreamTokenizer(input);
			
			// Although StreamTokenizer will parse numbers, it doesn't recognize
			// scientific notation (E or D); however, Double.valueOf does.
			// The strategy here is to disable StreamTokenizer's number parsing.
			// We'll only get whitespace delimited words, EOL's and EOF's.
			// These words should all be numbers, for Double.valueOf to parse.

            tokenizer.Settings.SetDefaults();//.ResetSyntax();
			tokenizer.Settings.WordChars(0, 255);
			tokenizer.Settings.WhitespaceChars(0,(int) ' ');//  .WhitespaceChars(0, ' ');
			tokenizer.Settings.GrabEol=true;
			System.Collections.ArrayList v = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
			
			// Ignore initial empty lines
            Token token;
            tokenizer.NextToken(out token);
			while (token is EolToken)// == SupportClass.StreamTokenizerSupport.TT_EOL)
				;
			//if (token.ttype == SupportClass.StreamTokenizerSupport.TT_EOF)
            if (token is EofToken)
				throw new System.IO.IOException("Unexpected EOF on matrix read.");
            do
            {
               // v.Add(System.Double.Parse(tokenizer.sval)); // Read & store 1st row.
                v.Add(System.Double.Parse(token.StringValue)); // Read & store 1st row.
                tokenizer.NextToken(out token);
            }
            while (token is WordToken);
			//while (tokenizer.NextToken() == SupportClass.StreamTokenizerSupport.TT_WORD);
			
			int n = v.Count; // Now we've got the number of columns!
			double[] row = new double[n];
			for (int j = 0; j < n; j++)
			// extract the elements of the 1st row.
				row[j] = ((System.Double) v[j]);
			v.Clear();
			v.Add(row); // Start storing rows instead of columns.
            tokenizer.NextToken(out token);
            while (token is WordToken)
			//while (tokenizer.NextToken() == SupportClass.StreamTokenizerSupport.TT_WORD)
			{
				// While non-empty lines
				v.Add(row = new double[n]);
				int j = 0;
				do 
				{
					if (j >= n)
						throw new System.IO.IOException("Row " + v.Count + " is too long.");
					//row[j++] = System.Double.Parse(tokenizer.sval);
                    row[j++] = System.Double.Parse(token.StringValue);
                    tokenizer.NextToken(out token);
				}
                while (token is WordToken);
              
				//while (tokenizer.NextToken() == SupportClass.StreamTokenizerSupport.TT_WORD);
				if (j < n)
					throw new System.IO.IOException("Row " + v.Count + " is too short.");
			}
			int m = v.Count; // Now we've got the number of rows.
			double[][] A = new double[m][];
			v.CopyTo(A); // copy the rows out of the vector
			return new Matrix(A);
		}