public NOMIS2011Census() { //define field names on NOMIS website catalogue page that we require for processing //TitleField = "Description"; //LinkField = "oaurl"; //TagsField = ""; //DescriptionField = ""; //doesn't exist CSVCatalogue reader = new CSVCatalogue(); this.Catalogue = reader.ReadCatalogue(Path.Combine(DataRootDir, CatalogueFile)); //FileFilterOptions = new FileFilter(FileFilterEnum.Top, ""); FileFilterOptions = new FileFilter(FileFilterEnum.Pattern, "DATA.CSV"); //had to change this to prevent returning CODE0.CSV file instead //add weights for geometry to favour 2011 datasets over the older ones SetGeometryHint("OA_2011", 2.0f); SetGeometryHint("OA", 0.1f); SetGeometryHint("LSOA_2011", 2.0f); SetGeometryHint("LSOA", 0.1f); SetGeometryHint("MSOA_2011", 2.0f); SetGeometryHint("MSOA", 0.1f); //then create a schema to describe what the columns are Schema = new DatastoreSchema(); Schema.AddField("TableName", SemanticFieldType.UniqueKey); Schema.AddField("Description", SemanticFieldType.Title); Schema.AddField("oaurl", SemanticFieldType.Link); //there are two links to data here - oa/lsoa/msoa or wards (below) //Schema.AddField("wardurl", SemanticFieldType.Link); //Now build a table of description text for every variable using the variables file. //This is a quick lookup between variable code and plain text which is used for writing out data file. This is //duplicated in the data table loading below. //VariableNameDescriptionText = new Dictionary<string, string>(); //using (TextReader varsFile = File.OpenText(Path.Combine(DataRootDir, VariablesFile))) //{ // string Line = varsFile.ReadLine(); //skip header // while ((Line = varsFile.ReadLine()) != null) // { // string[] Fields = CSVCatalogue.ParseCSVLine(Line); //need to do this for the quoted final column // //ColumnVariableCode,ColumnVariableMeasurementUnit,ColumnVariableStatisticalUnit,ColumnVariableDescription // //KS101EW0001,Count,Person,All categories: Sex // //KS101EW0002,Count,Person,Males // VariableNameDescriptionText.Add(Fields[0], Fields[3]); // } // varsFile.Close(); //} //This is a full DataTable containing all the data about each individual variable from the variable lookup: //ColumnVariableCode,ColumnVariableMeasurementUnit,ColumnVariableStatisticalUnit,ColumnVariableDescription //KS101EW0001,Count,Person,All categories: Sex //KS101EW0002,Count,Person,Males //Used for the short and long description text. CSVCatalogue VarCatalogue = new CSVCatalogue(); VariableMetaData = VarCatalogue.ReadCatalogue(Path.Combine(DataRootDir, VariablesFile)); VariableMetaData.PrimaryKey = new DataColumn[] { VariableMetaData.Columns["ColumnVariableCode"] }; }
//constructor? public LondonDatastore() { //define field names in LondonDatastore data that we require for processing //TitleField = "TITLE"; //LinkField = "CSV_URL"; //TagsField = ""; //DescriptionField = "LONGDESC"; CSVCatalogue reader = new CSVCatalogue(); this.Catalogue = reader.ReadCatalogue(Path.Combine(DataRootDir, CatalogueFile)); //then create a schema to describe what the columns are //define field names in LondonDatastore data that we require for processing Schema = new DatastoreSchema(); Schema.AddField("TITLE", SemanticFieldType.Title); Schema.AddField("LONGDESC", SemanticFieldType.Description); Schema.AddField("CSV_URL", SemanticFieldType.Link); }
public GovDatastore() { //define field names in GovDatastore data that we require for processing //TitleField = "title"; //LinkField = "resource-0-url"; //TagsField = "tags"; //DescriptionField = "notes_rendered"; CSVCatalogue reader = new CSVCatalogue(); reader.LineEndings = "\r"; //override line endings as this catalogue file only uses a CR this.Catalogue = reader.ReadCatalogue(Path.Combine(DataRootDir, CatalogueFile)); this.ResourcesDT = reader.ReadCatalogue(Path.Combine(DataRootDir, CatResourcesFile)); //datasets has: Name,Title,URL,Organization,Top level organisation,License,Published,NII,Location,Import source,Author,Geographic Coverage,Isopen,License,License Id,Maintainer,Mandate,Metadata Created,Metadata Modified,Notes,Odi Certificate,ODI Certificate URL,Tags,Temporal Coverage From,Temporal Coverage To,Primary Theme,Secondary Themes,Update Frequency,Version //resources has: Dataset Name,URL,Format,Description,Resource ID,Position,Date,Organization,Top level organization //so join on Name and Dataset Name //todo: this doesn't work as the dataset name in the resources file isn't unique - it contains multiple entries for all the resources attached to a dataset. //this means that you're going to have to handle two tables and merge the descriptions together somehow. //TODO: none of this works yet /*DataColumn DatasetNameCol = resource.Columns["Dataset Name"]; * resource.PrimaryKey = new DataColumn[] { DatasetNameCol }; * //create the new columns in catalogue * //foreach (DataColumn col in resource.Columns) * //{ * // if (col.ColumnName == "URL") this.Catalogue.Columns.Add("URL2"); //there's already one in the catalogue csv file * // if (col.ColumnName != "Dataset Name") this.Catalogue.Columns.Add(col.ColumnName, typeof(string)); * //} * //Manually add columns because of the duplicates * this.Catalogue.Columns.Add("URL2", typeof(string)); * this.Catalogue.Columns.Add("Format", typeof(string)); * this.Catalogue.Columns.Add("Description", typeof(string)); * this.Catalogue.Columns.Add("Resource ID", typeof(string)); * this.Catalogue.Columns.Add("Position", typeof(string)); * this.Catalogue.Columns.Add("Date", typeof(string)); * //now add elements to row, joining in name and Dataset Namerows * foreach (DataRow row in this.Catalogue.Rows) * { * string DatasetName = row["Name"] as string; * DataRow ResRow = resource.Rows.Find(DatasetName); * if (ResRow == null) * { * System.Diagnostics.Debug.WriteLine("Error: resource " + DatasetName + " not found in catalogue"); * } * else * { * row["URL2"] = ResRow["URL"]; * row["Format"] = ResRow["Format"]; * row["Description"] = ResRow["Description"]; * row["Resource ID"] = ResRow["Resource ID"]; * row["Position"] = ResRow["Position"]; * row["Date"] = ResRow["Date"]; * } * }*/ //resource-0-format is CSV (also look at RDF etc) //also note bbox-east-long, bbox-north-lat, bbox-south-lat, bbox-west-long, spatial-reference-system and spatial contains a polygon box //then create a schema to describe what the columns are //define field names in GovDatastore data that we require for processing //2012 schema //Schema = new DatastoreSchema(); //Schema.AddField("title", SemanticFieldType.Title); //Schema.AddField("notes_rendered", SemanticFieldType.Description); //Schema.AddField("resource-0-url", SemanticFieldType.Link); //Schema.AddField("tags", SemanticFieldType.Tags); //2016 schema //as of 4 April 2016, the data now looks like this: //Name,Title,URL,Organization,Top level organisation,License,Published,NII,Location,Import source,Author,Geographic Coverage,Isopen,License,License Id,Maintainer,Mandate,Metadata Created,Metadata Modified,Notes,Odi Certificate,ODI Certificate URL,Tags,Temporal Coverage From,Temporal Coverage To,Primary Theme,Secondary Themes,Update Frequency,Version Schema = new DatastoreSchema(); Schema.AddField("Title", SemanticFieldType.Title); //Schema.AddField("Notes", SemanticFieldType.Description); Schema.AddField("Description", SemanticFieldType.Description); //Schema.AddField("URL", SemanticFieldType.Link); Schema.AddField("URL2", SemanticFieldType.Link); Schema.AddField("Tags", SemanticFieldType.Tags); }