/// <summary> Loads a cost matrix in the old format from a reader. Adapted from code once sitting /// in Instances.java /// /// </summary> /// <param name="reader">the reader to get the values from. /// </param> /// <exception cref="Exception">if the matrix cannot be read correctly. /// </exception> //UPGRADE_ISSUE: Class hierarchy differences between 'java.io.Reader' and 'System.IO.StreamReader' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1186'" public virtual void readOldFormat(System.IO.StreamReader reader) { StreamTokenizer tokenizer; Token currentToken; double firstIndex, secondIndex, weight; tokenizer = new StreamTokenizer(reader); initialize(); tokenizer.Settings.CommentChar('%'); tokenizer.Settings.GrabEol=true; tokenizer.NextToken(out currentToken); //while (SupportClass.StreamTokenizerSupport.TT_EOF != (currentToken = tokenizer.NextToken())) while (!(currentToken is EofToken)) { // Skip empty lines //if (currentToken == SupportClass.StreamTokenizerSupport.TT_EOL) if (currentToken is EolToken) { continue; } // Get index of first class. //if (currentToken != SupportClass.StreamTokenizerSupport.TT_NUMBER) if (!((currentToken is FloatToken)|| (currentToken is IntToken))) { throw new System.Exception("Only numbers and comments allowed " + "in cost file!"); } //firstIndex = tokenizer.nval; firstIndex = Convert.ToDouble(currentToken.StringValue); //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" if (!Utils.eq((double) firstIndex, firstIndex)) { throw new System.Exception("First number in line has to be " + "index of a class!"); } //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" if ((int) firstIndex >= size()) { throw new System.Exception("Class index out of range!"); } // Get index of second class. //if (SupportClass.StreamTokenizerSupport.TT_EOF == (currentToken = tokenizer.NextToken())) tokenizer.NextToken(out currentToken); if (currentToken is EofToken) { throw new System.Exception("Premature end of file!"); } //if (currentToken == SupportClass.StreamTokenizerSupport.TT_EOL) if (currentToken is EolToken) { throw new System.Exception("Premature end of line!"); } //if (currentToken != SupportClass.StreamTokenizerSupport.TT_NUMBER) if (!((currentToken is IntToken) || (currentToken is FloatToken))) { throw new System.Exception("Only numbers and comments allowed " + "in cost file!"); } //secondIndex = tokenizer.nval; secondIndex = Convert.ToDouble(currentToken.StringValue); //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" if (!Utils.eq((double) secondIndex, secondIndex)) { throw new System.Exception("Second number in line has to be " + "index of a class!"); } //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" if ((int) secondIndex >= size()) { throw new System.Exception("Class index out of range!"); } //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" if ((int) secondIndex == (int) firstIndex) { throw new System.Exception("Diagonal of cost matrix non-zero!"); } // Get cost factor. tokenizer.NextToken(out currentToken); if (currentToken is EofToken) //if (SupportClass.StreamTokenizerSupport.TT_EOF == (currentToken = tokenizer.NextToken())) { throw new System.Exception("Premature end of file!"); } //if (currentToken == SupportClass.StreamTokenizerSupport.TT_EOL) if (currentToken is EolToken) { throw new System.Exception("Premature end of line!"); } //if (currentToken != SupportClass.StreamTokenizerSupport.TT_NUMBER) if (!((currentToken is IntToken) || (currentToken is FloatToken))) { throw new System.Exception("Only numbers and comments allowed " + "in cost file!"); } weight = Convert.ToDouble(currentToken.StringValue); if (!Utils.gr(weight, 0)) { throw new System.Exception("Only positive weights allowed!"); } //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" setXmlElement((int) firstIndex, (int) secondIndex, weight); tokenizer.NextToken(out currentToken); } }
/// <summary> Gets next token, checking for a premature and of line. /// /// </summary> /// <param name="tokenizer">the stream tokenizer /// </param> /// <exception cref="IOException">if it finds a premature end of line /// </exception> protected internal virtual void getNextToken(StreamTokenizer tokenizer,out Token token) { tokenizer.NextToken(out token); if (token is EolToken) { errms(tokenizer, "premature end of line"); } if (token is EofToken) { errms(tokenizer, "premature end of file"); } //else if ((tokenizer.ttype == '\'') || (tokenizer.ttype == '"')) else if ((token is CharToken) && ((token.StringValue == "'") || (token.StringValue == "\"") )) { //tokenizer.ttype = SupportClass.StreamTokenizerSupport.TT_WORD; } //else if ((tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_WORD) && (tokenizer.sval.Equals("?"))) else if ((token is CharToken) && (token.StringValue == "?")) { //tokenizer.ttype = '?'; } }
/// <summary> Reads and skips all tokens before next end of line token. /// /// </summary> /// <param name="tokenizer">the stream tokenizer /// </param> protected internal virtual void readTillEOL(StreamTokenizer tokenizer) { Token token; tokenizer.NextToken(out token); while (!(token is EolToken)) { tokenizer.NextToken(out token); } ; tokenizer.PushBack(token); }
/// <summary> Gets token and checks if its end of line. /// /// </summary> /// <param name="tokenizer">the stream tokenizer /// </param> /// <exception cref="IOException">if it doesn't find an end of line /// </exception> protected internal virtual void getLastToken(StreamTokenizer tokenizer,out Token token , bool endOfFileOk) { tokenizer.NextToken(out token); if ( (!(token is EolToken)) && ( (!(token is EofToken)) || !endOfFileOk)) { errms(tokenizer, "end of line expected"); } }
/// <summary> Gets index, checking for a premature and of line. /// /// </summary> /// <param name="tokenizer">the stream tokenizer /// </param> /// <exception cref="IOException">if it finds a premature end of line /// </exception> protected internal virtual void getIndex(StreamTokenizer tokenizer, out Token token) { tokenizer.NextToken(out token); if (token is EolToken) { errms(tokenizer, "premature end of line"); } if (token is EofToken) { errms(tokenizer, "premature end of file"); } }
/// <summary> Gets next token, skipping empty lines. /// /// </summary> /// <param name="tokenizer">the stream tokenizer /// </param> /// <exception cref="IOException">if reading the next token fails /// </exception> protected internal virtual void getFirstToken(StreamTokenizer tokenizer,out Token token) { //while (tokenizer.NextToken() == SupportClass.StreamTokenizerSupport.TT_EOL) tokenizer.NextToken(out token); while(token is EolToken) { tokenizer.NextToken(out token); } ; //if ((tokenizer.ttype == '\'') || (tokenizer.ttype == '"')) //if ((token.StringValue == "'") || (token.StringValue == "\"") ) //{ //tokenizer.ttype = SupportClass.StreamTokenizerSupport.TT_WORD; //} //else if ((tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_WORD) && (tokenizer.sval.Equals("?"))) //{ // tokenizer.ttype = '?'; //} }
/// <summary> Reads and stores header of an ARFF file. /// /// </summary> /// <param name="tokenizer">the stream tokenizer /// </param> /// <exception cref="IOException">if the information is not read /// successfully /// </exception> protected internal virtual void readHeader(StreamTokenizer tokenizer) { System.String attributeName; FastVector attributeValues; //int i; Token token=null; // Get name of relation. getFirstToken(tokenizer, out token); //if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_EOF) if ((token != null) && (token is EofToken)) { errms(tokenizer, "premature end of file"); } if (ARFF_RELATION.ToUpper().Equals(token.StringValue.ToUpper())) { getNextToken(tokenizer,out token); m_RelationName = token.StringValue; getLastToken(tokenizer,out token, false); } else { errms(tokenizer, "keyword " + ARFF_RELATION + " expected"); } // Create vectors to hold information temporarily. m_Attributes = new FastVector(); // Get attribute declarations. getFirstToken(tokenizer, out token); //if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_EOF) if ((token != null) && (token is EofToken)) { errms(tokenizer, "premature end of file"); } while (Attribute.ARFF_ATTRIBUTE.ToUpper().Equals(token.StringValue.ToUpper())) { // Get attribute name. getNextToken(tokenizer,out token); attributeName = token.StringValue; getNextToken(tokenizer,out token); // Check if attribute is nominal. //if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_WORD) if ((token != null) && (token is WordToken)) { // Attribute is real, integer, or string. if (token.StringValue.ToUpper().Equals(Attribute.ARFF_ATTRIBUTE_REAL.ToUpper()) || token.StringValue.ToUpper().Equals(Attribute.ARFF_ATTRIBUTE_INTEGER.ToUpper()) || token.StringValue.ToUpper().Equals(Attribute.ARFF_ATTRIBUTE_NUMERIC.ToUpper())) { m_Attributes.addElement(new Attribute(attributeName, numAttributes())); readTillEOL(tokenizer); } else if (token.StringValue.ToUpper().Equals(Attribute.ARFF_ATTRIBUTE_STRING.ToUpper())) { m_Attributes.addElement(new Attribute(attributeName, (FastVector) null, numAttributes())); readTillEOL(tokenizer); } else if (token.StringValue.ToUpper().Equals(Attribute.ARFF_ATTRIBUTE_DATE.ToUpper())) { System.String format = null; tokenizer.NextToken(out token); //if (tokenizer.NextToken() != SupportClass.StreamTokenizerSupport.TT_EOL) if ((token != null) && (!(token is EofToken))) { //if ((tokenizer.ttype != SupportClass.StreamTokenizerSupport.TT_WORD) && (tokenizer.ttype != '\'') && (tokenizer.ttype != '\"')) if ((token != null) && (!(token is WordToken)) && (token.StringValue!="'") && (token.StringValue!="\"") ) { errms(tokenizer, "not a valid date format"); } format = token.StringValue; readTillEOL(tokenizer); } else { tokenizer.PushBack(token); } m_Attributes.addElement(new Attribute(attributeName, format, numAttributes())); } else { errms(tokenizer, "no valid attribute type or invalid " + "enumeration"); } } else { // Attribute is nominal. attributeValues = new FastVector(); tokenizer.PushBack(token); // Get values for nominal attribute. tokenizer.NextToken(out token); if ( token.StringValue != "{") { errms(tokenizer, "{ expected at beginning of enumeration"); } tokenizer.NextToken(out token); while ( token.StringValue != "}") { //if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_EOL) if (token is EolToken) { errms(tokenizer, "} expected at end of enumeration"); } else { attributeValues.addElement(token.StringValue); } tokenizer.NextToken(out token); } if (attributeValues.size() == 0) { errms(tokenizer, "no nominal values found"); } m_Attributes.addElement(new Attribute(attributeName, attributeValues, numAttributes())); } getLastToken(tokenizer,out token, false); getFirstToken(tokenizer,out token); //if (tokenizer.ttype == SupportClass.StreamTokenizerSupport.TT_EOF) if (token is EofToken) errms(tokenizer, "premature end of file"); } // Check if data part follows. We can't easily check for EOL. if (!ARFF_DATA.ToUpper().Equals(token.StringValue.ToUpper())) { errms(tokenizer, "keyword " + ARFF_DATA + " expected"); } // Check if any attributes have been declared. if (m_Attributes.size() == 0) { errms(tokenizer, "no attributes declared"); } // Allocate buffers in case sparse instances have to be read m_ValueBuffer = new double[numAttributes()]; m_IndicesBuffer = new int[numAttributes()]; }
/// <summary> Read a matrix from a stream. The format is the same the print method, /// so printed matrices can be read back in (provided they were printed using /// US Locale). XmlElements are separated by /// whitespace, all the elements for each row appear on a single line, /// the last row is followed by a blank line. /// <p/> /// Note: This format differs from the one that can be read via the /// Matrix(Reader) constructor! For that format, the write(Writer) method /// is used (from the original weka.core.Matrix class). /// /// </summary> /// <param name="input">the input stream. /// </param> /// <seealso cref="Matrix(Reader)"> /// </seealso> /// <seealso cref="write(Writer)"> /// </seealso> public static Matrix read(System.IO.StreamReader input) { StreamTokenizer tokenizer = new StreamTokenizer(input); // Although StreamTokenizer will parse numbers, it doesn't recognize // scientific notation (E or D); however, Double.valueOf does. // The strategy here is to disable StreamTokenizer's number parsing. // We'll only get whitespace delimited words, EOL's and EOF's. // These words should all be numbers, for Double.valueOf to parse. tokenizer.Settings.SetDefaults();//.ResetSyntax(); tokenizer.Settings.WordChars(0, 255); tokenizer.Settings.WhitespaceChars(0,(int) ' ');// .WhitespaceChars(0, ' '); tokenizer.Settings.GrabEol=true; System.Collections.ArrayList v = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); // Ignore initial empty lines Token token; tokenizer.NextToken(out token); while (token is EolToken)// == SupportClass.StreamTokenizerSupport.TT_EOL) ; //if (token.ttype == SupportClass.StreamTokenizerSupport.TT_EOF) if (token is EofToken) throw new System.IO.IOException("Unexpected EOF on matrix read."); do { // v.Add(System.Double.Parse(tokenizer.sval)); // Read & store 1st row. v.Add(System.Double.Parse(token.StringValue)); // Read & store 1st row. tokenizer.NextToken(out token); } while (token is WordToken); //while (tokenizer.NextToken() == SupportClass.StreamTokenizerSupport.TT_WORD); int n = v.Count; // Now we've got the number of columns! double[] row = new double[n]; for (int j = 0; j < n; j++) // extract the elements of the 1st row. row[j] = ((System.Double) v[j]); v.Clear(); v.Add(row); // Start storing rows instead of columns. tokenizer.NextToken(out token); while (token is WordToken) //while (tokenizer.NextToken() == SupportClass.StreamTokenizerSupport.TT_WORD) { // While non-empty lines v.Add(row = new double[n]); int j = 0; do { if (j >= n) throw new System.IO.IOException("Row " + v.Count + " is too long."); //row[j++] = System.Double.Parse(tokenizer.sval); row[j++] = System.Double.Parse(token.StringValue); tokenizer.NextToken(out token); } while (token is WordToken); //while (tokenizer.NextToken() == SupportClass.StreamTokenizerSupport.TT_WORD); if (j < n) throw new System.IO.IOException("Row " + v.Count + " is too short."); } int m = v.Count; // Now we've got the number of rows. double[][] A = new double[m][]; v.CopyTo(A); // copy the rows out of the vector return new Matrix(A); }