/// <summary> /// Extracts data off a csv file /// </summary> /// <param name="file"></param> /// <param name="dataStore"></param> /// <param name="delimiter"></param> /// <param name="hasHeader"></param> /// <returns></returns> public static List <Dictionary <string, object> > ExtractCsvData(string file, DataStore dataStore, string delimiter = ";", bool hasHeader = true, bool hasGeo = true) { var data = new List <Dictionary <string, object> >(); using (var rdr = new StreamReader(file)) using (var csvRdr = new CsvReader(rdr)) { csvRdr.Configuration.Delimiter = delimiter; csvRdr.Configuration.HasHeaderRecord = hasHeader; if (hasHeader) { csvRdr.Read(); } csvRdr.ReadHeader(); //create col names map from data store, so can match csv header col names to safe db col names var colNamesMap = new Dictionary <string, string>(); var colTypesMap = new Dictionary <string, ColumnDataType>(); foreach (var c in dataStore.DataSource.Columns) { colNamesMap[c.FriendlyName] = c.Name; colTypesMap[c.Name] = c.Type; } //data while (csvRdr.Read()) { var rec = new Dictionary <string, object>(); foreach (var colName in csvRdr.Context.HeaderRecord) { var cName = string.Empty; if (FlatLonProps.Contains(colName)) { cName = "lo"; } else if (FlatLatProps.Contains(colName)) { cName = "la"; } else if (FlatGeomProps.Contains(colName)) { cName = "wkt"; } else { cName = colNamesMap[colName]; } rec.Add(cName, ParseToColumnDataType(csvRdr.GetField(colName), colTypesMap.ContainsKey(cName) ? colTypesMap[cName] : ColumnDataType.String)); } if (hasGeo && !(rec.ContainsKey("lo") && rec.ContainsKey("la") || rec.ContainsKey("wkt"))) { throw MapHive.Core.DataModel.Validation.Utils.GenerateValidationFailedException("FlatData", "no_geo", "Flat file does not contain any spatial information (usually lon / lat)"); } data.Add(rec); } } return(data); }
/// <summary> /// Processes a flat json file; file has got to have numeric lon / longitude & lat / latitude properties; coords are assumed to be in lon/lat /// </summary> /// <param name="dbCtx"></param> /// <param name="path"></param> /// <param name="dsc"></param> /// <returns></returns> public static async Task <DataStore> ProcessJson(DbContext dbCtx, string path, DataSourceCredentials dsc) { //assuming a single zip can only be present in a directory, as uploading data for a single layer //if there is a zip archive, need to extract it ExtractZip(path); //test for required shp format files presence... var file = Directory.GetFiles(path, "*.json").FirstOrDefault(); if (string.IsNullOrEmpty(file)) { throw MapHive.Core.DataModel.Validation.Utils.GenerateValidationFailedException("JSON", "no_json_file", "JSON file has not been found"); } var fName = Path.GetFileNameWithoutExtension(file); var output = GetDataStore(fName, "json", dsc); var json = JsonConvert.DeserializeObject(File.ReadAllText(file)); if (json.GetType() != typeof(JArray)) { throw MapHive.Core.DataModel.Validation.Utils.GenerateValidationFailedException("JSON", "not_array", "JSON file has not been deserialized to array"); } var data = new List <Dictionary <string, object> >(((JArray)json).Count); foreach (JObject jRec in (JArray)json) { var rec = new Dictionary <string, object>(); foreach (var jProp in jRec.Properties()) { var propName = jProp.Name.ToLower(); //basically looking at a flat json file, BUT allowing convenience parsing for some nested properties if (FlatLonProps.Contains(propName)) { rec.Add("lo", GetValueFromJsonProperty(jProp.Value)); } else if (FlatLatProps.Contains(propName)) { rec.Add("la", GetValueFromJsonProperty(jProp.Value)); } else if (FlatGeoLocationProps.Contains(propName)) { if (TryExtractLoLaFromJsonObject((JObject)jProp.Value, out var lo, out var la)) { rec.Add("lo", lo); rec.Add("la", la); } ; } else if (FlatGeomProps.Contains(propName)) { rec.Add("wkt", GetValueFromJsonProperty(jProp.Value)); } else { rec.Add(jProp.Name, GetValueFromJsonProperty(jProp.Value)); } } if (rec.ContainsKey("lo") && rec.ContainsKey("la") || rec.ContainsKey("wkt")) { data.Add(rec); } } //work out a data model - this is json, so can be totally unpredictable foreach (var rec in data) { foreach (var fProp in rec) { //ignore lon/lat, this will be turned into a point //testing for lon/lat only as data is already normalized if (fProp.Key == "lo" || fProp.Key == "la" || fProp.Key == "wkt") { continue; } if (output.DataSource.Columns.Any(c => c.Name == GetSafeDbObjectName(fProp.Key))) { continue; } if (!CheckIfObjectSafe(fProp.Key)) { throw MapHive.Core.DataModel.Validation.Utils.GenerateValidationFailedException("ColName", "bad_col_name", "Column name contains forbidden words"); } var colType = SystemTypeToColumnDataType(fProp.Value.GetType()); if (colType != ColumnDataType.Unknown) { output.DataSource.Columns.Add(new Column { Type = colType, Name = GetSafeDbObjectName(fProp.Key), FriendlyName = fProp.Key }); } } } //create object straight away, so when something goes wrong with import, etc. there is a chance for a cleanup bot //to pick it up and cleanup the orphaned data when necessary await output.CreateAsync(dbCtx); return(await ProcessFlatData(dbCtx, output, data)); }