static TestableMultipleCorrespondence00() { // Create a data stream. string[] data = new string[6] { "COLOR,NUMBER", "Red,Negative", "Green,Zero", "Red,Negative", "Blue,Negative", "Blue,Positive" }; MemoryStream stream = new(); StreamWriter writer = new(stream); for (int i = 0; i < data.Length; i++) { writer.WriteLine(data[i].ToCharArray()); writer.Flush(); } stream.Position = 0; // Encode the categorical data set StreamReader streamReader = new(stream); char columnDelimiter = ','; IndexCollection extractedColumns = IndexCollection.Range(0, 1); bool firstLineContainsColumnHeaders = true; dataSet = CategoricalDataSet.Encode( streamReader, columnDelimiter, extractedColumns, firstLineContainsColumnHeaders); }
public void Main() { // Create a data stream. string[] data = new string[6] { "COLOR,NUMBER", "Red,Negative", "Green,Zero", "Red,Negative", "Black,Negative", "Black,Positive" }; MemoryStream stream = new(); StreamWriter writer = new(stream); for (int i = 0; i < data.Length; i++) { writer.WriteLine(data[i].ToCharArray()); writer.Flush(); } stream.Position = 0; // Encode the categorical data set. StreamReader streamReader = new(stream); char columnDelimiter = ','; IndexCollection extractedColumns = IndexCollection.Range(0, 1); bool firstLineContainsColumnHeaders = true; CategoricalDataSet dataset = CategoricalDataSet.Encode( streamReader, columnDelimiter, extractedColumns, firstLineContainsColumnHeaders); // Decode and show the data set. Console.WriteLine("Decoded data set:"); Console.WriteLine(); var decodedDataSet = dataset.Decode(); int numberOfInstances = dataset.Data.NumberOfRows; int numberOfVariables = dataset.Data.NumberOfColumns; foreach (var variable in dataset.Variables) { Console.Write(variable.Name + ","); } Console.WriteLine(); for (int i = 0; i < numberOfInstances; i++) { for (int j = 0; j < numberOfVariables; j++) { Console.Write(decodedDataSet[i][j] + ","); } Console.WriteLine(); } }
public void Main() { // Create a data stream. string[] data = new string[7] { "COLOR,NUMBER", "Red,Negative", "Green,Zero", "White,Positive", "Red,Negative", "Blue,Negative", "Blue,Positive" }; MemoryStream stream = new(); StreamWriter writer = new(stream); for (int i = 0; i < data.Length; i++) { writer.WriteLine(data[i].ToCharArray()); writer.Flush(); } stream.Position = 0; // Encode the categorical data set. StreamReader streamReader = new(stream); char columnDelimiter = ','; IndexCollection extractedColumns = IndexCollection.Range(0, 1); bool firstLineContainsColumnHeaders = true; var dataSet = CategoricalDataSet.Encode( streamReader, columnDelimiter, extractedColumns, firstLineContainsColumnHeaders); // Assign the categories of variable NUMBER // to the rows of the table. int rowVariableIndex = 1; // Assign the categories of variable COLOR // to the columns of the table. int columnVariableIndex = 0; // Get the NUMBER-by-COLOR table. var table = dataSet.GetContingencyTable( rowVariableIndex, columnVariableIndex); // Show the table. Console.WriteLine("Contingency table:"); Console.WriteLine(table); }
public void Main() { // Create a data stream. const int numberOfInstances = 27; string[] data = new string[numberOfInstances + 1] { "NUMERICAL,TARGET", "0,A", "0,A", "0,A", "1,B", "1,B", "1,B", "1,B", "2,B", "2,B", "3,C", "3,C", "3,C", "4,B", "4,B", "4,B", "4,C", "5,A", "5,A", "6,A", "7,C", "7,C", "7,C", "8,C", "8,C", "9,C", "9,C", "9,C" }; MemoryStream stream = new(); StreamWriter writer = new(stream); for (int i = 0; i < data.Length; i++) { writer.WriteLine(data[i].ToCharArray()); writer.Flush(); } stream.Position = 0; // Identify the special categorizer for variable NUMERICAL. StreamReader streamReader = new(stream); char columnDelimiter = ','; IndexCollection numericalColumns = IndexCollection.Range(0, 0); bool firstLineContainsColumnHeaders = true; int targetColumn = 1; IFormatProvider provider = CultureInfo.InvariantCulture; var specialCategorizers = CategoricalDataSet.CategorizeByEntropyMinimization( streamReader, columnDelimiter, numericalColumns, firstLineContainsColumnHeaders, targetColumn, provider); // Encode the categorical data set using the special categorizer. stream.Position = 0; IndexCollection extractedColumns = IndexCollection.Range(0, 1); CategoricalDataSet dataset = CategoricalDataSet.Encode( streamReader, columnDelimiter, extractedColumns, firstLineContainsColumnHeaders, specialCategorizers, provider); // Decode and show the data set. Console.WriteLine("Decoded data set:"); Console.WriteLine(); var decodedDataSet = dataset.Decode(); int numberOfVariables = dataset.Data.NumberOfColumns; foreach (var variable in dataset.Variables) { Console.Write(variable.Name + ","); } Console.WriteLine(); for (int i = 0; i < numberOfInstances; i++) { for (int j = 0; j < numberOfVariables; j++) { Console.Write(decodedDataSet[i][j] + ","); } Console.WriteLine(); } }
public void FromEncodedDataTest() { // variables is null { List <CategoricalVariable> variables = null; DoubleMatrix data = DoubleMatrix.Identity(3); ArgumentExceptionAssert.Throw( () => { var actual = CategoricalDataSet.FromEncodedData( variables: variables, data: data); }, expectedType: typeof(ArgumentNullException), expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage, expectedParameterName: "variables"); } // data is null { List <CategoricalVariable> variables = new(); DoubleMatrix data = null; ArgumentExceptionAssert.Throw( () => { var actual = CategoricalDataSet.FromEncodedData( variables: variables, data: data); }, expectedType: typeof(ArgumentNullException), expectedPartialMessage: ArgumentExceptionAssert.NullPartialMessage, expectedParameterName: "data"); } // variables count unequal to data number of columns { List <CategoricalVariable> variables = new() { new CategoricalVariable("var0"), new CategoricalVariable("var1") }; DoubleMatrix data = DoubleMatrix.Dense(6, 3); ArgumentExceptionAssert.Throw( () => { var actual = CategoricalDataSet.FromEncodedData( variables: variables, data: data); }, expectedType: typeof(ArgumentOutOfRangeException), expectedPartialMessage: String.Format( ImplementationServices.GetResourceString( "STR_EXCEPT_CAT_COLUMNS_NOT_EQUAL_TO_VARIABLES_COUNT"), "variables"), expectedParameterName: "data"); } // category not included in variable { List <CategoricalVariable> variables = new() { new CategoricalVariable("var0"), new CategoricalVariable("var1") }; variables[0].Add(0.0); variables[1].Add(1.0); DoubleMatrix data = DoubleMatrix.Dense(1, 2); ArgumentExceptionAssert.Throw( () => { var actual = CategoricalDataSet.FromEncodedData( variables: variables, data: data); }, expectedType: typeof(ArgumentException), expectedPartialMessage: ImplementationServices.GetResourceString( "STR_EXCEPT_CAT_MATRIX_ENTRY_NOT_IN_VARIABLE_LIST"), expectedParameterName: "data"); } // Valid input { // Create a data stream string[] data = new string[6] { "COLOR,NUMBER", "Red, -2.2", "Green, 0.0", "Red, -3.3", "Black,-1.1", "Black, 4.4" }; MemoryStream stream = new(); StreamWriter writer = new(stream); for (int i = 0; i < data.Length; i++) { writer.WriteLine(data[i].ToCharArray()); writer.Flush(); } stream.Position = 0; // Define a special categorizer for variable NUMBER string numberCategorizer(string token, IFormatProvider provider) { double datum = Convert.ToDouble(token, provider); if (datum == 0) { return("Zero"); } else if (datum < 0) { return("Negative"); } else { return("Positive"); } } // Attach the special categorizer to variable NUMBER int numberColumnIndex = 1; var specialCategorizers = new Dictionary <int, Categorizer> { { numberColumnIndex, numberCategorizer } }; // Encode the categorical data set StreamReader streamReader = new(stream); char columnDelimiter = ','; IndexCollection extractedColumns = IndexCollection.Range(0, 1); bool firstLineContainsColumnHeaders = true; CategoricalDataSet actual = CategoricalDataSet.Encode( streamReader, columnDelimiter, extractedColumns, firstLineContainsColumnHeaders, specialCategorizers, CultureInfo.InvariantCulture); CategoricalVariable color = new("COLOR") { { 0, "Red" },