public NOMIS2011Census()
        {
            //define field names on NOMIS website catalogue page that we require for processing
            //TitleField = "Description";
            //LinkField = "oaurl";
            //TagsField = "";
            //DescriptionField = ""; //doesn't exist
            CSVCatalogue reader = new CSVCatalogue();

            this.Catalogue = reader.ReadCatalogue(Path.Combine(DataRootDir, CatalogueFile));
            //FileFilterOptions = new FileFilter(FileFilterEnum.Top, "");
            FileFilterOptions = new FileFilter(FileFilterEnum.Pattern, "DATA.CSV"); //had to change this to prevent returning CODE0.CSV file instead

            //add weights for geometry to favour 2011 datasets over the older ones
            SetGeometryHint("OA_2011", 2.0f); SetGeometryHint("OA", 0.1f);
            SetGeometryHint("LSOA_2011", 2.0f); SetGeometryHint("LSOA", 0.1f);
            SetGeometryHint("MSOA_2011", 2.0f); SetGeometryHint("MSOA", 0.1f);

            //then create a schema to describe what the columns are
            Schema = new DatastoreSchema();
            Schema.AddField("TableName", SemanticFieldType.UniqueKey);
            Schema.AddField("Description", SemanticFieldType.Title);
            Schema.AddField("oaurl", SemanticFieldType.Link); //there are two links to data here - oa/lsoa/msoa or wards (below)
            //Schema.AddField("wardurl", SemanticFieldType.Link);

            //Now build a table of description text for every variable using the variables file.
            //This is a quick lookup between variable code and plain text which is used for writing out data file. This is
            //duplicated in the data table loading below.
            //VariableNameDescriptionText = new Dictionary<string, string>();
            //using (TextReader varsFile = File.OpenText(Path.Combine(DataRootDir, VariablesFile)))
            //{
            //    string Line = varsFile.ReadLine(); //skip header
            //    while ((Line = varsFile.ReadLine()) != null)
            //    {
            //        string[] Fields = CSVCatalogue.ParseCSVLine(Line); //need to do this for the quoted final column
            //        //ColumnVariableCode,ColumnVariableMeasurementUnit,ColumnVariableStatisticalUnit,ColumnVariableDescription
            //        //KS101EW0001,Count,Person,All categories: Sex
            //        //KS101EW0002,Count,Person,Males
            //        VariableNameDescriptionText.Add(Fields[0], Fields[3]);
            //    }
            //    varsFile.Close();
            //}

            //This is a full DataTable containing all the data about each individual variable from the variable lookup:
            //ColumnVariableCode,ColumnVariableMeasurementUnit,ColumnVariableStatisticalUnit,ColumnVariableDescription
            //KS101EW0001,Count,Person,All categories: Sex
            //KS101EW0002,Count,Person,Males
            //Used for the short and long description text.
            CSVCatalogue VarCatalogue = new CSVCatalogue();

            VariableMetaData            = VarCatalogue.ReadCatalogue(Path.Combine(DataRootDir, VariablesFile));
            VariableMetaData.PrimaryKey = new DataColumn[] { VariableMetaData.Columns["ColumnVariableCode"] };
        }
        /// <summary>
        /// Most of this code comes from AnalyseCorrelationData, but it returns a mapping between the dataset index and a plain text name containing the dataset table and variable
        /// </summary>
        /// <returns></returns>
        public Dictionary <int, string> GetDescriptionForIndex()
        {
            Dictionary <int, string> Result = new Dictionary <int, string>();

            //load mapping between unique dataset field code and plain text description into hash
            Dictionary <string, string> variables = new Dictionary <string, string>();

            using (TextReader varsFile = File.OpenText(Path.Combine(DataRootDir, "NOMIS2011Variables.txt")))
            {
                string Line = varsFile.ReadLine(); //skip header
                while ((Line = varsFile.ReadLine()) != null)
                {
                    string[] Fields = CSVCatalogue.ParseCSVLine(Line); //need to do this for the quoted final column
                    //ColumnVariableCode,ColumnVariableMeasurementUnit,ColumnVariableStatisticalUnit,ColumnVariableDescription
                    //KS101EW0001,Count,Person,All categories: Sex
                    //KS101EW0002,Count,Person,Males
                    variables.Add(Fields[0], Fields[3]);
                }
                varsFile.Close();
            }

            //load mapping between major/minor index and the unique column code
            //I'm not actually using the two index dictionaries, but keep it in anyway
            Dictionary <string, string> indexToFieldName = new Dictionary <string, string>();
            Dictionary <string, string> indexToTableName = new Dictionary <string, string>();

            using (TextReader mapIndexFile = File.OpenText(Path.Combine(ImageDirectory, "mapindex.csv")))
            {
                string Line  = mapIndexFile.ReadLine(); //skip header
                int    index = 0;
                while ((Line = mapIndexFile.ReadLine()) != null)
                {
                    //major_index,minor_index,data_uri,uniquekey,title,column
                    //0,0,"file:///c:/richard/wxtemp/Datastores/ks101ew_2011_oa/ks101ew_2011oa/KS101EWDATA_LSOA.csv","KS101EW","Usual Resident Population","KS101EW0001"
                    string[] Fields = CSVCatalogue.ParseCSVLine(Line);
                    indexToFieldName.Add(Fields[0] + "-" + Fields[1], Fields[5]);
                    indexToTableName.Add(Fields[0] + "-" + Fields[1], Fields[4]);
                    //Result.Add(Fields[0] + "-" + Fields[1], Fields[5] + " " + Fields[4]);
                    Result.Add(index, Fields[5] + " " + Fields[4] + " " + variables[Fields[5]]);
                    ++index;
                }
                mapIndexFile.Close();
            }

            return(Result);
        }
Example #3
0
        //constructor?

        public LondonDatastore()
        {
            //define field names in LondonDatastore data that we require for processing
            //TitleField = "TITLE";
            //LinkField = "CSV_URL";
            //TagsField = "";
            //DescriptionField = "LONGDESC";
            CSVCatalogue reader = new CSVCatalogue();

            this.Catalogue = reader.ReadCatalogue(Path.Combine(DataRootDir, CatalogueFile));

            //then create a schema to describe what the columns are
            //define field names in LondonDatastore data that we require for processing
            Schema = new DatastoreSchema();
            Schema.AddField("TITLE", SemanticFieldType.Title);
            Schema.AddField("LONGDESC", SemanticFieldType.Description);
            Schema.AddField("CSV_URL", SemanticFieldType.Link);
        }
        /// <summary>
        /// Load the NOMIS variables file, mapindex.csv file and imatch-sorted.csv file and write out plain text descriptions of everything that we think matches.
        /// TODO: need some sort of datastore neutral way of doing this for everything, not just NOMIS
        /// </summary>
        public void AnalyseCorrelationData()
        {
            //load mapping between unique dataset field code and plain text description into hash
            Dictionary <string, string> variables = new Dictionary <string, string>();

            using (TextReader varsFile = File.OpenText(Path.Combine(DataRootDir, "NOMIS2011Variables.txt")))
            {
                string Line = varsFile.ReadLine(); //skip header
                while ((Line = varsFile.ReadLine()) != null)
                {
                    string[] Fields = CSVCatalogue.ParseCSVLine(Line); //need to do this for the quoted final column
                    //ColumnVariableCode,ColumnVariableMeasurementUnit,ColumnVariableStatisticalUnit,ColumnVariableDescription
                    //KS101EW0001,Count,Person,All categories: Sex
                    //KS101EW0002,Count,Person,Males
                    variables.Add(Fields[0], Fields[3]);
                }
                varsFile.Close();
            }

            //load mapping between major/minor index and the unique column code
            Dictionary <string, string> indexToFieldName = new Dictionary <string, string>();
            Dictionary <string, string> indexToTableName = new Dictionary <string, string>();

            using (TextReader mapIndexFile = File.OpenText(Path.Combine(ImageDirectory, "mapindex.csv")))
            {
                string Line = mapIndexFile.ReadLine(); //skip header
                while ((Line = mapIndexFile.ReadLine()) != null)
                {
                    //major_index,minor_index,data_uri,uniquekey,title,column
                    //0,0,"file:///c:/richard/wxtemp/Datastores/ks101ew_2011_oa/ks101ew_2011oa/KS101EWDATA_LSOA.csv","KS101EW","Usual Resident Population","KS101EW0001"
                    string[] Fields = CSVCatalogue.ParseCSVLine(Line);
                    indexToFieldName.Add(Fields[0] + "-" + Fields[1], Fields[5]);
                    indexToTableName.Add(Fields[0] + "-" + Fields[1], Fields[4]);
                }
                mapIndexFile.Close();
            }

            //now read the data and write out plain text descriptions of the matches that are found
            using (TextReader matchFile = File.OpenText(Path.Combine(ImageDirectory, "GreenMatch\\imatch-sorted.csv")))
            {
                //imajor, iminor, jmajor, jminor, value (would have i,j and two filenames, but had to remove them as the csvfix sort required too much memory)
                //0,1,10,1,5.90647686550526
                string Line = "";
                while ((Line = matchFile.ReadLine()) != null)
                {
                    string[] Fields = CSVCatalogue.ParseCSVLine(Line);
                    //int imajor = Convert.ToInt32(Fields[0]);
                    //int iminor = Convert.ToInt32(Fields[1]);
                    //int jmajor = Convert.ToInt32(Fields[2]);
                    //int jminor = Convert.ToInt32(Fields[3]);
                    float value = Convert.ToSingle(Fields[4]);
                    if (value > 20.0f)
                    {
                        break;                //it's a sorted list and 20 is just about on the first knee of the curve
                    }
                    string I = Fields[0] + "-" + Fields[1];
                    string J = Fields[2] + "-" + Fields[3];
                    if (I != J)                               //filter out everything matching itself
                    {
                        string ITable  = indexToTableName[I]; //get the names of the tables where the data comes from using the major/minor indexes
                        string JTable  = indexToTableName[J];
                        string IColumn = indexToFieldName[I]; //get unique column codes from major/minor map numbers
                        string JColumn = indexToFieldName[J];
                        string IText   = variables[IColumn];  //use the two unique column codes to lookup the text descriptions
                        string JText   = variables[JColumn];
                        System.Diagnostics.Debug.WriteLine(value + "," + IColumn + "," + JColumn + ",\"(" + ITable + ") " + IText + " AND (" + JTable + ") " + JText + "\"");
                    }
                }
            }
        }
Example #5
0
        public GovDatastore()
        {
            //define field names in GovDatastore data that we require for processing
            //TitleField = "title";
            //LinkField = "resource-0-url";
            //TagsField = "tags";
            //DescriptionField = "notes_rendered";
            CSVCatalogue reader = new CSVCatalogue();

            reader.LineEndings = "\r"; //override line endings as this catalogue file only uses a CR
            this.Catalogue     = reader.ReadCatalogue(Path.Combine(DataRootDir, CatalogueFile));
            this.ResourcesDT   = reader.ReadCatalogue(Path.Combine(DataRootDir, CatResourcesFile));
            //datasets has: Name,Title,URL,Organization,Top level organisation,License,Published,NII,Location,Import source,Author,Geographic Coverage,Isopen,License,License Id,Maintainer,Mandate,Metadata Created,Metadata Modified,Notes,Odi Certificate,ODI Certificate URL,Tags,Temporal Coverage From,Temporal Coverage To,Primary Theme,Secondary Themes,Update Frequency,Version
            //resources has: Dataset Name,URL,Format,Description,Resource ID,Position,Date,Organization,Top level organization
            //so join on Name and Dataset Name

            //todo: this doesn't work as the dataset name in the resources file isn't unique - it contains multiple entries for all the resources attached to a dataset.
            //this means that you're going to have to handle two tables and merge the descriptions together somehow.

            //TODO: none of this works yet

            /*DataColumn DatasetNameCol = resource.Columns["Dataset Name"];
             * resource.PrimaryKey = new DataColumn[] { DatasetNameCol };
             * //create the new columns in catalogue
             * //foreach (DataColumn col in resource.Columns)
             * //{
             * //    if (col.ColumnName == "URL") this.Catalogue.Columns.Add("URL2"); //there's already one in the catalogue csv file
             * //    if (col.ColumnName != "Dataset Name") this.Catalogue.Columns.Add(col.ColumnName, typeof(string));
             * //}
             * //Manually add columns because of the duplicates
             * this.Catalogue.Columns.Add("URL2", typeof(string));
             * this.Catalogue.Columns.Add("Format", typeof(string));
             * this.Catalogue.Columns.Add("Description", typeof(string));
             * this.Catalogue.Columns.Add("Resource ID", typeof(string));
             * this.Catalogue.Columns.Add("Position", typeof(string));
             * this.Catalogue.Columns.Add("Date", typeof(string));
             * //now add elements to row, joining in name and Dataset Namerows
             * foreach (DataRow row in this.Catalogue.Rows)
             * {
             *  string DatasetName = row["Name"] as string;
             *  DataRow ResRow = resource.Rows.Find(DatasetName);
             *  if (ResRow == null)
             *  {
             *      System.Diagnostics.Debug.WriteLine("Error: resource " + DatasetName + " not found in catalogue");
             *  }
             *  else
             *  {
             *      row["URL2"] = ResRow["URL"];
             *      row["Format"] = ResRow["Format"];
             *      row["Description"] = ResRow["Description"];
             *      row["Resource ID"] = ResRow["Resource ID"];
             *      row["Position"] = ResRow["Position"];
             *      row["Date"] = ResRow["Date"];
             *  }
             * }*/

            //resource-0-format is CSV (also look at RDF etc)
            //also note bbox-east-long, bbox-north-lat, bbox-south-lat, bbox-west-long, spatial-reference-system and spatial contains a polygon box

            //then create a schema to describe what the columns are
            //define field names in GovDatastore data that we require for processing
            //2012 schema
            //Schema = new DatastoreSchema();
            //Schema.AddField("title", SemanticFieldType.Title);
            //Schema.AddField("notes_rendered", SemanticFieldType.Description);
            //Schema.AddField("resource-0-url", SemanticFieldType.Link);
            //Schema.AddField("tags", SemanticFieldType.Tags);

            //2016 schema
            //as of 4 April 2016, the data now looks like this:
            //Name,Title,URL,Organization,Top level organisation,License,Published,NII,Location,Import source,Author,Geographic Coverage,Isopen,License,License Id,Maintainer,Mandate,Metadata Created,Metadata Modified,Notes,Odi Certificate,ODI Certificate URL,Tags,Temporal Coverage From,Temporal Coverage To,Primary Theme,Secondary Themes,Update Frequency,Version
            Schema = new DatastoreSchema();
            Schema.AddField("Title", SemanticFieldType.Title);
            //Schema.AddField("Notes", SemanticFieldType.Description);
            Schema.AddField("Description", SemanticFieldType.Description);
            //Schema.AddField("URL", SemanticFieldType.Link);
            Schema.AddField("URL2", SemanticFieldType.Link);
            Schema.AddField("Tags", SemanticFieldType.Tags);
        }