public static void createPercentiles(Dictionary <int, string> inputSet, string filePath, BlobPusher pusher) { decimal numberOfGroups = 100; int i = 0; if (inputSet.Count < 100) { numberOfGroups = 10; } int groupSize = Convert.ToInt32(Math.Ceiling((decimal)inputSet.Count / numberOfGroups)); List <Dictionary <int, string> > result = inputSet.OrderBy(pair => double.Parse(pair.Value)) .GroupBy(x => i++ / groupSize) .Select(d => d.ToDictionary(pair => pair.Key, pair => pair.Value)) .ToList(); int counter = 1; Console.WriteLine(filePath); foreach (Dictionary <int, string> dictionary in result) { pusher.push(dictionary, filePath + "/" + counter++); } Console.WriteLine("COUNTER VALUE" + counter); }
public static void createDiscreteSets(Dictionary <int, string> inputSet, string filePath, BlobPusher pusher) { List <Dictionary <int, string> > dictionaryList = inputSet.GroupBy(pair => pair.Value) .Select(d => d.ToDictionary(pair => pair.Key, pair => pair.Value)) .ToList(); foreach (Dictionary <int, string> dict in dictionaryList) { pusher.push(dict, filePath + "/" + dict.First().Value); } }
public static List <Dictionary <int, string> > GenerateSets(DBConnection conn, bool testing = false) { conn.ExecuteNonQuery("USE " + conn.GetDatabaseName()); List <Dictionary <int, string> > outputList = new List <Dictionary <int, string> >(); List <String> tables = conn.GetTables(); Navigator.Navigator navigator = new Navigator.Navigator(); MySqlConnection connection = conn.GetConnection(); //show columns from schools.school; foreach (String table in tables) { // show keys from schools.school where Key_name = 'PRIMARY'; String query = "show columns from " + table; // Get List of columns for each table MySqlCommand cmd = new MySqlCommand(query, connection); MySqlDataReader dataReader = cmd.ExecuteReader(); // Put the columns into a List // If column is primary key assign a separate variable to it List <String> columns = new List <String>(); string primaryKey = String.Empty; while (dataReader.Read()) { if (dataReader.GetString(3).Equals("PRI")) { primaryKey = dataReader.GetString(0); } else { columns.Add(dataReader.GetString(0)); } } dataReader.Close(); // Iterate through each column with the primary key (id) column foreach (String column in columns) { //select idstudent, year from schools.gcse; string statement = "select `" + primaryKey + "`, `" + column + "` from " + table + ";"; Console.WriteLine(statement); // Set time out to something huge MySqlCommand setTimers = new MySqlCommand("set net_write_timeout=99999; set net_read_timeout=99999", connection); setTimers.ExecuteNonQuery(); // Scoping problems for the finally statement MySqlCommand command = null; MySqlDataReader dataReader2 = null; // Unique name of set // Defined by: ID - Column_name String key = primaryKey + "-" + column; try { // Get all values of the column command = new MySqlCommand(statement, connection); command.CommandTimeout = 0; dataReader2 = command.ExecuteReader(); BinaryWriter file; string path; Dictionary <int, string> outputDictionary = new Dictionary <int, string>(); //if (key.ToUpper().Contains("STUDENT")) //{ // path = "Sets/Student/" + table + "/" + column; //} else //{ // path = "Sets/School/" + table + "/" + column; //} path = "Sets/" + table + "/" + column; setMappings.Add(table + "-" + column, path); while (dataReader2.Read()) { int key1 = dataReader2.GetInt32(0); string value1 = dataReader2.GetString(1); String output = key1 + "\t" + value1; outputDictionary.Add(key1, value1); } if (testing) { outputList.Add(outputDictionary); } else { Task[] tasks = new Task[2]; tasks[0] = Task.Factory.StartNew(() => blobPusher.push(outputDictionary, path)); tasks[1] = Task.Factory.StartNew(() => DecisionTree.parse(outputDictionary, path, blobPusher)); Task.WaitAll(tasks); } dataReader2.Close(); } catch (Exception e) { Console.WriteLine(e); } finally { if (dataReader2 != null) { dataReader2.Close(); } } } } return(outputList); }