/// <summary> /// Most of this code comes from AnalyseCorrelationData, but it returns a mapping between the dataset index and a plain text name containing the dataset table and variable /// </summary> /// <returns></returns> public Dictionary <int, string> GetDescriptionForIndex() { Dictionary <int, string> Result = new Dictionary <int, string>(); //load mapping between unique dataset field code and plain text description into hash Dictionary <string, string> variables = new Dictionary <string, string>(); using (TextReader varsFile = File.OpenText(Path.Combine(DataRootDir, "NOMIS2011Variables.txt"))) { string Line = varsFile.ReadLine(); //skip header while ((Line = varsFile.ReadLine()) != null) { string[] Fields = CSVCatalogue.ParseCSVLine(Line); //need to do this for the quoted final column //ColumnVariableCode,ColumnVariableMeasurementUnit,ColumnVariableStatisticalUnit,ColumnVariableDescription //KS101EW0001,Count,Person,All categories: Sex //KS101EW0002,Count,Person,Males variables.Add(Fields[0], Fields[3]); } varsFile.Close(); } //load mapping between major/minor index and the unique column code //I'm not actually using the two index dictionaries, but keep it in anyway Dictionary <string, string> indexToFieldName = new Dictionary <string, string>(); Dictionary <string, string> indexToTableName = new Dictionary <string, string>(); using (TextReader mapIndexFile = File.OpenText(Path.Combine(ImageDirectory, "mapindex.csv"))) { string Line = mapIndexFile.ReadLine(); //skip header int index = 0; while ((Line = mapIndexFile.ReadLine()) != null) { //major_index,minor_index,data_uri,uniquekey,title,column //0,0,"file:///c:/richard/wxtemp/Datastores/ks101ew_2011_oa/ks101ew_2011oa/KS101EWDATA_LSOA.csv","KS101EW","Usual Resident Population","KS101EW0001" string[] Fields = CSVCatalogue.ParseCSVLine(Line); indexToFieldName.Add(Fields[0] + "-" + Fields[1], Fields[5]); indexToTableName.Add(Fields[0] + "-" + Fields[1], Fields[4]); //Result.Add(Fields[0] + "-" + Fields[1], Fields[5] + " " + Fields[4]); Result.Add(index, Fields[5] + " " + Fields[4] + " " + variables[Fields[5]]); ++index; } mapIndexFile.Close(); } return(Result); }
/// <summary> /// Load the NOMIS variables file, mapindex.csv file and imatch-sorted.csv file and write out plain text descriptions of everything that we think matches. /// TODO: need some sort of datastore neutral way of doing this for everything, not just NOMIS /// </summary> public void AnalyseCorrelationData() { //load mapping between unique dataset field code and plain text description into hash Dictionary <string, string> variables = new Dictionary <string, string>(); using (TextReader varsFile = File.OpenText(Path.Combine(DataRootDir, "NOMIS2011Variables.txt"))) { string Line = varsFile.ReadLine(); //skip header while ((Line = varsFile.ReadLine()) != null) { string[] Fields = CSVCatalogue.ParseCSVLine(Line); //need to do this for the quoted final column //ColumnVariableCode,ColumnVariableMeasurementUnit,ColumnVariableStatisticalUnit,ColumnVariableDescription //KS101EW0001,Count,Person,All categories: Sex //KS101EW0002,Count,Person,Males variables.Add(Fields[0], Fields[3]); } varsFile.Close(); } //load mapping between major/minor index and the unique column code Dictionary <string, string> indexToFieldName = new Dictionary <string, string>(); Dictionary <string, string> indexToTableName = new Dictionary <string, string>(); using (TextReader mapIndexFile = File.OpenText(Path.Combine(ImageDirectory, "mapindex.csv"))) { string Line = mapIndexFile.ReadLine(); //skip header while ((Line = mapIndexFile.ReadLine()) != null) { //major_index,minor_index,data_uri,uniquekey,title,column //0,0,"file:///c:/richard/wxtemp/Datastores/ks101ew_2011_oa/ks101ew_2011oa/KS101EWDATA_LSOA.csv","KS101EW","Usual Resident Population","KS101EW0001" string[] Fields = CSVCatalogue.ParseCSVLine(Line); indexToFieldName.Add(Fields[0] + "-" + Fields[1], Fields[5]); indexToTableName.Add(Fields[0] + "-" + Fields[1], Fields[4]); } mapIndexFile.Close(); } //now read the data and write out plain text descriptions of the matches that are found using (TextReader matchFile = File.OpenText(Path.Combine(ImageDirectory, "GreenMatch\\imatch-sorted.csv"))) { //imajor, iminor, jmajor, jminor, value (would have i,j and two filenames, but had to remove them as the csvfix sort required too much memory) //0,1,10,1,5.90647686550526 string Line = ""; while ((Line = matchFile.ReadLine()) != null) { string[] Fields = CSVCatalogue.ParseCSVLine(Line); //int imajor = Convert.ToInt32(Fields[0]); //int iminor = Convert.ToInt32(Fields[1]); //int jmajor = Convert.ToInt32(Fields[2]); //int jminor = Convert.ToInt32(Fields[3]); float value = Convert.ToSingle(Fields[4]); if (value > 20.0f) { break; //it's a sorted list and 20 is just about on the first knee of the curve } string I = Fields[0] + "-" + Fields[1]; string J = Fields[2] + "-" + Fields[3]; if (I != J) //filter out everything matching itself { string ITable = indexToTableName[I]; //get the names of the tables where the data comes from using the major/minor indexes string JTable = indexToTableName[J]; string IColumn = indexToFieldName[I]; //get unique column codes from major/minor map numbers string JColumn = indexToFieldName[J]; string IText = variables[IColumn]; //use the two unique column codes to lookup the text descriptions string JText = variables[JColumn]; System.Diagnostics.Debug.WriteLine(value + "," + IColumn + "," + JColumn + ",\"(" + ITable + ") " + IText + " AND (" + JTable + ") " + JText + "\""); } } } }