/// <summary> /// Extracts csv columns /// </summary> /// <param name="file"></param> /// <param name="dataStore"></param> /// <param name="delimiter"></param> /// <param name="hasHeader"></param> /// <param name="colNamesRemap">A dictionary used to remap column names to required names</param> /// <param name="colTypesMap">map of safe column names to column types to enforce a specified csv data parsing</param> public static void ExtractCsvColumns( string file, DataStore dataStore, string delimiter = ";", bool hasHeader = true, Dictionary <string, string> colNamesRemap = null, Dictionary <string, ColumnDataType> colTypesMap = null ) { using (var rdr = new StreamReader(file)) using (var csvRdr = new CsvReader(rdr)) { csvRdr.Configuration.Delimiter = delimiter; csvRdr.Configuration.HasHeaderRecord = hasHeader; if (hasHeader) { csvRdr.Read(); } csvRdr.ReadHeader(); foreach (var colName in csvRdr.Context.HeaderRecord) { if (FlatLonProps.Contains(colName) || FlatLatProps.Contains(colName)) { continue; } if (!CheckIfObjectSafe(colName)) { throw MapHive.Core.DataModel.Validation.Utils.GenerateValidationFailedException("ColName", "bad_col_name", "Column name contains forbidden words"); } var safeColName = GetSafeDbObjectName(colName); if (colNamesRemap?.ContainsKey(safeColName) == true) { safeColName = colNamesRemap[safeColName]; } dataStore.DataSource.Columns.Add(new Column { Type = colTypesMap?.ContainsKey(safeColName) == true ? colTypesMap[safeColName] : ColumnDataType.String, Name = safeColName, FriendlyName = colName }); } } }
/// <summary> /// Extracts lo/la from a json object /// </summary> /// <param name="jObj"></param> /// <param name="lo"></param> /// <param name="la"></param> /// <returns></returns> protected static bool TryExtractLoLaFromJsonObject(JObject jObj, out object lo, out object la) { lo = null; la = null; var jProps = jObj.Properties(); var loProp = jProps.FirstOrDefault(p => FlatLonProps.Contains(p.Name.ToLower())); if (loProp != null) { lo = loProp.Value; } var laProp = jProps.FirstOrDefault(p => FlatLatProps.Contains(p.Name.ToLower())); if (laProp != null) { la = laProp.Value; } if (lo == null && la == null) { var locationProp = jProps.FirstOrDefault(p => FlatGeoLocationProps.Contains(p.Name.ToLower())); if (locationProp != null) { if (locationProp.Value is JArray array && array.Count == 2) { lo = array.First.Value <double>(); la = array.Last.Value <double>(); } else { TryExtractLoLaFromJsonObject((JObject)locationProp.Value, out lo, out la); } } }
/// <summary> /// Extracts data off a csv file /// </summary> /// <param name="file"></param> /// <param name="dataStore"></param> /// <param name="delimiter"></param> /// <param name="hasHeader"></param> /// <returns></returns> public static List <Dictionary <string, object> > ExtractCsvData(string file, DataStore dataStore, string delimiter = ";", bool hasHeader = true, bool hasGeo = true) { var data = new List <Dictionary <string, object> >(); using (var rdr = new StreamReader(file)) using (var csvRdr = new CsvReader(rdr)) { csvRdr.Configuration.Delimiter = delimiter; csvRdr.Configuration.HasHeaderRecord = hasHeader; if (hasHeader) { csvRdr.Read(); } csvRdr.ReadHeader(); //create col names map from data store, so can match csv header col names to safe db col names var colNamesMap = new Dictionary <string, string>(); var colTypesMap = new Dictionary <string, ColumnDataType>(); foreach (var c in dataStore.DataSource.Columns) { colNamesMap[c.FriendlyName] = c.Name; colTypesMap[c.Name] = c.Type; } //data while (csvRdr.Read()) { var rec = new Dictionary <string, object>(); foreach (var colName in csvRdr.Context.HeaderRecord) { var cName = string.Empty; if (FlatLonProps.Contains(colName)) { cName = "lo"; } else if (FlatLatProps.Contains(colName)) { cName = "la"; } else if (FlatGeomProps.Contains(colName)) { cName = "wkt"; } else { cName = colNamesMap[colName]; } rec.Add(cName, ParseToColumnDataType(csvRdr.GetField(colName), colTypesMap.ContainsKey(cName) ? colTypesMap[cName] : ColumnDataType.String)); } if (hasGeo && !(rec.ContainsKey("lo") && rec.ContainsKey("la") || rec.ContainsKey("wkt"))) { throw MapHive.Core.DataModel.Validation.Utils.GenerateValidationFailedException("FlatData", "no_geo", "Flat file does not contain any spatial information (usually lon / lat)"); } data.Add(rec); } } return(data); }
/// <summary> /// Processes a flat json file; file has got to have numeric lon / longitude & lat / latitude properties; coords are assumed to be in lon/lat /// </summary> /// <param name="dbCtx"></param> /// <param name="path"></param> /// <param name="dsc"></param> /// <returns></returns> public static async Task <DataStore> ProcessJson(DbContext dbCtx, string path, DataSourceCredentials dsc) { //assuming a single zip can only be present in a directory, as uploading data for a single layer //if there is a zip archive, need to extract it ExtractZip(path); //test for required shp format files presence... var file = Directory.GetFiles(path, "*.json").FirstOrDefault(); if (string.IsNullOrEmpty(file)) { throw MapHive.Core.DataModel.Validation.Utils.GenerateValidationFailedException("JSON", "no_json_file", "JSON file has not been found"); } var fName = Path.GetFileNameWithoutExtension(file); var output = GetDataStore(fName, "json", dsc); var json = JsonConvert.DeserializeObject(File.ReadAllText(file)); if (json.GetType() != typeof(JArray)) { throw MapHive.Core.DataModel.Validation.Utils.GenerateValidationFailedException("JSON", "not_array", "JSON file has not been deserialized to array"); } var data = new List <Dictionary <string, object> >(((JArray)json).Count); foreach (JObject jRec in (JArray)json) { var rec = new Dictionary <string, object>(); foreach (var jProp in jRec.Properties()) { var propName = jProp.Name.ToLower(); //basically looking at a flat json file, BUT allowing convenience parsing for some nested properties if (FlatLonProps.Contains(propName)) { rec.Add("lo", GetValueFromJsonProperty(jProp.Value)); } else if (FlatLatProps.Contains(propName)) { rec.Add("la", GetValueFromJsonProperty(jProp.Value)); } else if (FlatGeoLocationProps.Contains(propName)) { if (TryExtractLoLaFromJsonObject((JObject)jProp.Value, out var lo, out var la)) { rec.Add("lo", lo); rec.Add("la", la); } ; } else if (FlatGeomProps.Contains(propName)) { rec.Add("wkt", GetValueFromJsonProperty(jProp.Value)); } else { rec.Add(jProp.Name, GetValueFromJsonProperty(jProp.Value)); } } if (rec.ContainsKey("lo") && rec.ContainsKey("la") || rec.ContainsKey("wkt")) { data.Add(rec); } } //work out a data model - this is json, so can be totally unpredictable foreach (var rec in data) { foreach (var fProp in rec) { //ignore lon/lat, this will be turned into a point //testing for lon/lat only as data is already normalized if (fProp.Key == "lo" || fProp.Key == "la" || fProp.Key == "wkt") { continue; } if (output.DataSource.Columns.Any(c => c.Name == GetSafeDbObjectName(fProp.Key))) { continue; } if (!CheckIfObjectSafe(fProp.Key)) { throw MapHive.Core.DataModel.Validation.Utils.GenerateValidationFailedException("ColName", "bad_col_name", "Column name contains forbidden words"); } var colType = SystemTypeToColumnDataType(fProp.Value.GetType()); if (colType != ColumnDataType.Unknown) { output.DataSource.Columns.Add(new Column { Type = colType, Name = GetSafeDbObjectName(fProp.Key), FriendlyName = fProp.Key }); } } } //create object straight away, so when something goes wrong with import, etc. there is a chance for a cleanup bot //to pick it up and cleanup the orphaned data when necessary await output.CreateAsync(dbCtx); return(await ProcessFlatData(dbCtx, output, data)); }