Exemplo n.º 1
0
        public NOMIS2011Census()
        {
            //define field names on NOMIS website catalogue page that we require for processing
            //TitleField = "Description";
            //LinkField = "oaurl";
            //TagsField = "";
            //DescriptionField = ""; //doesn't exist
            CSVCatalogue reader = new CSVCatalogue();

            this.Catalogue = reader.ReadCatalogue(Path.Combine(DataRootDir, CatalogueFile));
            //FileFilterOptions = new FileFilter(FileFilterEnum.Top, "");
            FileFilterOptions = new FileFilter(FileFilterEnum.Pattern, "DATA.CSV"); //had to change this to prevent returning CODE0.CSV file instead

            //add weights for geometry to favour 2011 datasets over the older ones
            SetGeometryHint("OA_2011", 2.0f); SetGeometryHint("OA", 0.1f);
            SetGeometryHint("LSOA_2011", 2.0f); SetGeometryHint("LSOA", 0.1f);
            SetGeometryHint("MSOA_2011", 2.0f); SetGeometryHint("MSOA", 0.1f);

            //then create a schema to describe what the columns are
            Schema = new DatastoreSchema();
            Schema.AddField("TableName", SemanticFieldType.UniqueKey);
            Schema.AddField("Description", SemanticFieldType.Title);
            Schema.AddField("oaurl", SemanticFieldType.Link); //there are two links to data here - oa/lsoa/msoa or wards (below)
            //Schema.AddField("wardurl", SemanticFieldType.Link);

            //Now build a table of description text for every variable using the variables file.
            //This is a quick lookup between variable code and plain text which is used for writing out data file. This is
            //duplicated in the data table loading below.
            //VariableNameDescriptionText = new Dictionary<string, string>();
            //using (TextReader varsFile = File.OpenText(Path.Combine(DataRootDir, VariablesFile)))
            //{
            //    string Line = varsFile.ReadLine(); //skip header
            //    while ((Line = varsFile.ReadLine()) != null)
            //    {
            //        string[] Fields = CSVCatalogue.ParseCSVLine(Line); //need to do this for the quoted final column
            //        //ColumnVariableCode,ColumnVariableMeasurementUnit,ColumnVariableStatisticalUnit,ColumnVariableDescription
            //        //KS101EW0001,Count,Person,All categories: Sex
            //        //KS101EW0002,Count,Person,Males
            //        VariableNameDescriptionText.Add(Fields[0], Fields[3]);
            //    }
            //    varsFile.Close();
            //}

            //This is a full DataTable containing all the data about each individual variable from the variable lookup:
            //ColumnVariableCode,ColumnVariableMeasurementUnit,ColumnVariableStatisticalUnit,ColumnVariableDescription
            //KS101EW0001,Count,Person,All categories: Sex
            //KS101EW0002,Count,Person,Males
            //Used for the short and long description text.
            CSVCatalogue VarCatalogue = new CSVCatalogue();

            VariableMetaData            = VarCatalogue.ReadCatalogue(Path.Combine(DataRootDir, VariablesFile));
            VariableMetaData.PrimaryKey = new DataColumn[] { VariableMetaData.Columns["ColumnVariableCode"] };
        }
Exemplo n.º 2
0
        //constructor?

        public LondonDatastore()
        {
            //define field names in LondonDatastore data that we require for processing
            //TitleField = "TITLE";
            //LinkField = "CSV_URL";
            //TagsField = "";
            //DescriptionField = "LONGDESC";
            CSVCatalogue reader = new CSVCatalogue();

            this.Catalogue = reader.ReadCatalogue(Path.Combine(DataRootDir, CatalogueFile));

            //then create a schema to describe what the columns are
            //define field names in LondonDatastore data that we require for processing
            Schema = new DatastoreSchema();
            Schema.AddField("TITLE", SemanticFieldType.Title);
            Schema.AddField("LONGDESC", SemanticFieldType.Description);
            Schema.AddField("CSV_URL", SemanticFieldType.Link);
        }
Exemplo n.º 3
0
        public GovDatastore()
        {
            //define field names in GovDatastore data that we require for processing
            //TitleField = "title";
            //LinkField = "resource-0-url";
            //TagsField = "tags";
            //DescriptionField = "notes_rendered";
            CSVCatalogue reader = new CSVCatalogue();

            reader.LineEndings = "\r"; //override line endings as this catalogue file only uses a CR
            this.Catalogue     = reader.ReadCatalogue(Path.Combine(DataRootDir, CatalogueFile));
            this.ResourcesDT   = reader.ReadCatalogue(Path.Combine(DataRootDir, CatResourcesFile));
            //datasets has: Name,Title,URL,Organization,Top level organisation,License,Published,NII,Location,Import source,Author,Geographic Coverage,Isopen,License,License Id,Maintainer,Mandate,Metadata Created,Metadata Modified,Notes,Odi Certificate,ODI Certificate URL,Tags,Temporal Coverage From,Temporal Coverage To,Primary Theme,Secondary Themes,Update Frequency,Version
            //resources has: Dataset Name,URL,Format,Description,Resource ID,Position,Date,Organization,Top level organization
            //so join on Name and Dataset Name

            //todo: this doesn't work as the dataset name in the resources file isn't unique - it contains multiple entries for all the resources attached to a dataset.
            //this means that you're going to have to handle two tables and merge the descriptions together somehow.

            //TODO: none of this works yet

            /*DataColumn DatasetNameCol = resource.Columns["Dataset Name"];
             * resource.PrimaryKey = new DataColumn[] { DatasetNameCol };
             * //create the new columns in catalogue
             * //foreach (DataColumn col in resource.Columns)
             * //{
             * //    if (col.ColumnName == "URL") this.Catalogue.Columns.Add("URL2"); //there's already one in the catalogue csv file
             * //    if (col.ColumnName != "Dataset Name") this.Catalogue.Columns.Add(col.ColumnName, typeof(string));
             * //}
             * //Manually add columns because of the duplicates
             * this.Catalogue.Columns.Add("URL2", typeof(string));
             * this.Catalogue.Columns.Add("Format", typeof(string));
             * this.Catalogue.Columns.Add("Description", typeof(string));
             * this.Catalogue.Columns.Add("Resource ID", typeof(string));
             * this.Catalogue.Columns.Add("Position", typeof(string));
             * this.Catalogue.Columns.Add("Date", typeof(string));
             * //now add elements to row, joining in name and Dataset Namerows
             * foreach (DataRow row in this.Catalogue.Rows)
             * {
             *  string DatasetName = row["Name"] as string;
             *  DataRow ResRow = resource.Rows.Find(DatasetName);
             *  if (ResRow == null)
             *  {
             *      System.Diagnostics.Debug.WriteLine("Error: resource " + DatasetName + " not found in catalogue");
             *  }
             *  else
             *  {
             *      row["URL2"] = ResRow["URL"];
             *      row["Format"] = ResRow["Format"];
             *      row["Description"] = ResRow["Description"];
             *      row["Resource ID"] = ResRow["Resource ID"];
             *      row["Position"] = ResRow["Position"];
             *      row["Date"] = ResRow["Date"];
             *  }
             * }*/

            //resource-0-format is CSV (also look at RDF etc)
            //also note bbox-east-long, bbox-north-lat, bbox-south-lat, bbox-west-long, spatial-reference-system and spatial contains a polygon box

            //then create a schema to describe what the columns are
            //define field names in GovDatastore data that we require for processing
            //2012 schema
            //Schema = new DatastoreSchema();
            //Schema.AddField("title", SemanticFieldType.Title);
            //Schema.AddField("notes_rendered", SemanticFieldType.Description);
            //Schema.AddField("resource-0-url", SemanticFieldType.Link);
            //Schema.AddField("tags", SemanticFieldType.Tags);

            //2016 schema
            //as of 4 April 2016, the data now looks like this:
            //Name,Title,URL,Organization,Top level organisation,License,Published,NII,Location,Import source,Author,Geographic Coverage,Isopen,License,License Id,Maintainer,Mandate,Metadata Created,Metadata Modified,Notes,Odi Certificate,ODI Certificate URL,Tags,Temporal Coverage From,Temporal Coverage To,Primary Theme,Secondary Themes,Update Frequency,Version
            Schema = new DatastoreSchema();
            Schema.AddField("Title", SemanticFieldType.Title);
            //Schema.AddField("Notes", SemanticFieldType.Description);
            Schema.AddField("Description", SemanticFieldType.Description);
            //Schema.AddField("URL", SemanticFieldType.Link);
            Schema.AddField("URL2", SemanticFieldType.Link);
            Schema.AddField("Tags", SemanticFieldType.Tags);
        }