private static void Compare(string filename, Watershed baseWaterShedData, Watershed watershed) { Console.WriteLine(); Console.Write("checking for any differences.."); if (!baseWaterShedData.Equals(watershed)) { Console.WriteLine(filename); Console.WriteLine("WARNING: watershed read form disk was different!"); LogWarning("Difference found "); } else { Console.WriteLine(" ok."); } //// compare to reference. //var locations = watershed.Forecasts[0].Locations; //var refLocations = baseWaterShedData.Forecasts[0].Locations; //for (int i = 0; i < locations.Count; i++) //{ // // .Equals was overriden in the default implementation, // // but we can't guarantee that for any other implementation.... // if (!locations[i].Members.Equals(refLocations[i].Members)) // LogWarning("Difference found at location " + locations[i].LocationName); //} }
public Watershed ReadParallel(string watershedName, DateTime startDate, DateTime endDate) { if (!ValidDates(startDate, endDate)) { return(null); } var output = new Watershed(watershedName); // Each forecast is one day int numTotal = (int)Math.Round((endDate - startDate).TotalDays) + 1; Parallel.For(0, numTotal, i => { DateTime day = startDate.AddDays(i); var csv = Read(watershedName, day); if (csv != null) { lock (output) { foreach (string locName in csv.LocationNames) { Forecast f = output.AddForecast(locName, day, csv.GetEnsemble(locName), csv.TimeStamps); f.TimeStamps = csv.TimeStamps; } } } }); return(output); }
public static Watershed Read(H5Reader h5r, string watershedName) { string root = Path(H5Reader.Root, "Watersheds", watershedName); long[] dtTicks = null; float[,] data = null; Watershed retn = new Watershed(watershedName); var locationNames = h5r.GetGroupNames(root); foreach (var loc in locationNames) { var forecastNames = h5r.GetGroupNames(Path(root, loc)); foreach (var forecastDate in forecastNames) { //Watersheds/EastSierra/BCAC1/2013_307 string forecastPath = Path(root, loc, forecastDate); if (!TryParseIssueDate(forecastDate, out DateTime issueDate)) { Console.WriteLine("ERROR IN HDF5 PATH: " + forecastPath); continue; } h5r.ReadDataset(Path(forecastPath, "Times"), ref dtTicks); h5r.ReadDataset(Path(forecastPath, "Values"), ref data); var _times = dtTicks.Select(t => new DateTime(t)).ToArray(); retn.AddForecast(loc, issueDate, data, _times); } } return(retn); }
public Watershed Read(string watershedName, DateTime startDate, DateTime endDate) { if (!ValidDates(startDate, endDate)) { return(null); } var output = new Watershed(watershedName); DateTime t = startDate; while (t <= endDate) { // Seems threadsafe at a glance var csv = Read(watershedName, t); if (csv != null) { foreach (string locName in csv.LocationNames) { Forecast f = output.AddForecast(locName, t, csv.GetEnsemble(locName), csv.TimeStamps); f.TimeStamps = csv.TimeStamps; } } t = t.AddDays(1); } return(output); }
public static Watershed Read(string watershedName, DateTime start, DateTime end, string dssPath) { Watershed rval = new Watershed(watershedName); // DssReader.UseTrainingWheels = false; using (DssReader dss = new DssReader(dssPath, DssReader.MethodID.MESS_METHOD_GENERAL_ID, DssReader.LevelID.MESS_LEVEL_NONE)) { Console.WriteLine("Reading " + dssPath); DssPathCollection dssPaths = dss.GetCatalog(); // sorted int size = dssPaths.Count; if (size == 0) { throw new Exception("Empty DSS catalog"); } // /RUSSIANNAPA/APCC1/FLOW/01SEP2019/1HOUR/C:000002|T:0212019/ var seriesList = new List <Hec.Dss.TimeSeries>(); for (int i = 0; i < size; i++) { if (i % 100 == 0) { Console.Write("."); } DssPath path = dssPaths[i]; string location = path.Bpart; float[,] ensemble = null; ParseFPart(path.Fpart, out int memberidx, out DateTime issueDate); if (issueDate >= start && issueDate <= end && string.Equals(path.Apart, watershedName, StringComparison.OrdinalIgnoreCase)) { // Passing in 'path' (not the dateless string) is important, path without date triggers a heinous case in the dss low-level code var ts = dss.GetTimeSeries(path); if (NextForecast(seriesList, ts) || i == size - 1) { if (i == size - 1) { seriesList.Add(ts); } ConvertListToEnsembleArray(seriesList, ref ensemble); rval.AddForecast(path.Bpart, issueDate, ensemble, ts.Times); seriesList.Clear(); } seriesList.Add(ts); } } } return(rval); }
public static void Write(string dssFileName, Watershed watershed) { bool saveAsFloat = true; float[] ensembleMember = null; int count = 0; using (var w = new DssWriter(dssFileName, DssReader.MethodID.MESS_METHOD_GLOBAL_ID, DssReader.LevelID.MESS_LEVEL_CRITICAL)) { foreach (Location loc in watershed.Locations) { if (count % 100 == 0) { Console.Write("."); } int memberCounter = 0; foreach (Forecast f in loc.Forecasts) { int size = f.Ensemble.GetLength(0); for (int i = 0; i < size; i++) { f.EnsembleMember(i, ref ensembleMember); memberCounter++; /// A/B/FLOW//1 Hour/<FPART></FPART> //// c: ensemble.p var t = f.IssueDate; // /RUSSIANNAPA/APCC1/FLOW/01SEP2019/1HOUR/C:000002|T:0212019/ string F = "C:" + memberCounter.ToString().PadLeft(6, '0') + "|T:" + t.DayOfYear.ToString().PadLeft(3, '0') + t.Year.ToString(); var path = "/" + watershed.Name.ToString() + "/" + loc.Name + "/Flow//1Hour/" + F + "/"; Hec.Dss.TimeSeries timeseries = new Hec.Dss.TimeSeries { Values = Array.ConvertAll(ensembleMember, item => (double)item), Units = "", DataType = "INST-VAL", Path = new DssPath(path), StartDateTime = f.TimeStamps[0] }; w.Write(timeseries, saveAsFloat); count++; } } } } }
private static void Warmup(string watershedName) { // Let the JITTER settle down with the smallest case Console.WriteLine("Warmup time period, results will not be logged."); DisableTestReporting = true; // I'd like to warmup more, but it's SO FREAKING SLOW int daysWarmup = 3; CsvEnsembleReader r = new CsvEnsembleReader(CacheDir); Watershed w = r.Read(watershedName, StartTime, StartTime.AddDays(daysWarmup)); WriteAllFormats(w, true); DisableTestReporting = false; Console.WriteLine("Finished Warmup."); }
public Watershed CloneSubset(int takeCount) { int count = 0; var retn = new Watershed(this.Name); foreach (Location loc in this.Locations) { foreach (Forecast f in loc.Forecasts) { retn.AddForecast(f.Location.Name, f.IssueDate, f.Ensemble, f.TimeStamps); count++; if (count >= takeCount) { break; } } } return(retn); }
//static string DateTimeFormat = "yyyy-MM-dd HH:mm:ss"; public static void Write(string filename, Watershed watershed, bool compress = false, bool createPiscesDB = false) { using (SqLiteEnsembleWriter server = new SqLiteEnsembleWriter(filename)) { int index = server.MaxID(); byte[] uncompressed = null; Reclamation.TimeSeries.TimeSeriesDatabase db; foreach (Location loc in watershed.Locations) { foreach (Forecast f in loc.Forecasts) { index++; server.InsertEnsemble(index, f.IssueDate, watershed.Name, loc.Name, f.TimeStamps[0], f.Ensemble.GetLength(1), f.Ensemble.GetLength(0), compress, ConvertToBytes(f.Ensemble, compress, ref uncompressed)); } } } }
/// <summary> /// /RUSSIANNAPA/APCC1/ensemble-FLOW/01SEP2019/1HOUR/T:0212019/ /// </summary> /// <param name="dssFileName"></param> /// <param name="watersheds"></param> /// <param name="saveAsFloat"></param> /// <param name="version"></param> /// <returns></returns> public static void WriteToTimeSeriesProfiles(string dssFileName, Watershed watershed) { bool saveAsFloat = true; int count = 0; double[,] ensemble = null; using (var w = new DssWriter(dssFileName, DssReader.MethodID.MESS_METHOD_GLOBAL_ID, DssReader.LevelID.MESS_LEVEL_NONE)) { foreach (Location loc in watershed.Locations) { foreach (Forecast f in loc.Forecasts) { ArrayUtility.TransposeFloatToDouble(f.Ensemble, ref ensemble); if (count % 100 == 0) { Console.Write("."); } TimeSeriesProfile ts = new TimeSeriesProfile(); ts.StartDateTime = f.IssueDate; // /RUSSIANNAPA/APCC1/Ensemble-FLOW/01SEP2019/1HOUR/T:0212019/ string F = "|T:" + f.IssueDate.DayOfYear.ToString().PadLeft(3, '0') + f.IssueDate.Year.ToString(); var path = "/" + watershed.Name.ToString() + "/" + loc.Name + "/Ensemble-Flow//1Hour/" + F + "/"; ts.ColumnValues = Array.ConvertAll(Enumerable.Range(1, ensemble.GetLength(1)).ToArray(), x => (double)x); ts.DataType = "INST-VAL"; ts.Path = new DssPath(path); ts.Values = ensemble; ts.ColumnUnits = "cfs"; w.Write(ts, saveAsFloat); count++; } } } }
public static Watershed ReadTimeSeriesProfiles(string watershedName, DateTime start, DateTime end, string dssFileName) { Watershed rval = new Watershed(watershedName); float[,] profile = null; using (DssReader dss = new DssReader(dssFileName, DssReader.MethodID.MESS_METHOD_GENERAL_ID, DssReader.LevelID.MESS_LEVEL_NONE)) { Console.WriteLine("Reading" + dssFileName); DssPathCollection dssPaths = dss.GetCatalog(); // sorted // var dssPaths = rawDssPaths.OrderBy(a => a, new PathComparer()).ToArray(); // sorted int size = dssPaths.Count(); if (size == 0) { throw new Exception("Empty DSS catalog"); } // /RUSSIANNAPA/APCC1/FLOW/01SEP2019/1HOUR/|T:0212019/ for (int i = 0; i < size; i++) { if (i % 100 == 0) { Console.Write("."); } DssPath path = dssPaths[i]; DateTime issueDate = ParseIssueDate(path.Fpart); if (issueDate >= start && issueDate <= end && path.Apart == watershedName) { var ts = dss.GetTimeSeriesProfile(path); ArrayUtility.TransposeDoubleToFloat(ts.Values, ref profile); rval.AddForecast(path.Bpart, issueDate, profile, ts.Times); } } } return(rval); }
public static Watershed Read(string watershedName, DateTime startTime, DateTime endTime, string fileName) { SQLiteServer server = GetServer(fileName); var rval = new Watershed(watershedName); var sql = "select * from " + TableName + " WHERE issue_date >= '" + startTime.ToString(DateTimeFormat) + "' " + " AND issue_date <= '" + endTime.ToString(DateTimeFormat) + "' " + " AND watershed = '" + watershedName + "' "; sql += " order by watershed,issue_date,location_name"; var table = server.Table(TableName, sql); if (table.Rows.Count == 0) { throw new Exception("no data"); } DateTime prevIssueDate = Convert.ToDateTime(table.Rows[0]["issue_date"]); DateTime currentDate = Convert.ToDateTime(table.Rows[0]["issue_date"]); float[,] values = null; foreach (DataRow row in table.Rows) { currentDate = Convert.ToDateTime(row["issue_date"]); var times = GetTimes(row); GetValues(row, ref values); rval.AddForecast(row["location_name"].ToString(), currentDate, values, times); } return(rval); }
private static void DuplicateCheck(Watershed baseWaterShedData) { //var hs = new Dictionary<string, int>(); //foreach (var wshed in baseWaterShedData.Forecasts) //{ // var wsName = wshed.WatershedName; // foreach (Ensemble ie in wshed.Locations) // { // // This is being treated like a unique entity... // string ensemblePath = ie.LocationName + "|" + ie.IssueDate.Year.ToString() + "_" + ie.IssueDate.DayOfYear.ToString(); // if (hs.ContainsKey(ensemblePath)) // { // Console.WriteLine("Duplicate found."); // int ct = hs[ensemblePath]; // hs[ensemblePath] = ct + 1; // } // else // { // hs.Add(ensemblePath, 1); // } // } //} }
public static void Write(H5Writer h5w, Watershed watershed) { int chunkSize = 1; string root = Path(H5Reader.Root, "Watersheds", watershed.Name); long[] dtTicks = null; foreach (Location loc in watershed.Locations) { string watershedPath = Path(root, loc.Name); // For each forecast foreach (Forecast f in loc.Forecasts) { // I think this isn't unique... needs day/yr as well based on karls code. string ensemblePath = Path(watershedPath, f.IssueDate.Year.ToString() + "_" + f.IssueDate.DayOfYear.ToString()); h5w.CreateGroup(ensemblePath); // 2 datasets under this group - times and values. Write out times as long tick counts, // and values as floats. Both will be column-striated for easy vertical access string valuePath = Path(ensemblePath, "Values"); string timePath = Path(ensemblePath, "Times"); // Based on Karl's example, all ensembles have to have the same DateTimes. Write it once. var dts = f.TimeStamps; if (dtTicks == null || dtTicks.Length != dts.Length) { dtTicks = new long[dts.Length]; } for (int i = 0; i < dts.Length; i++) { dtTicks[i] = dts[i].Ticks; } h5w.WriteArray(timePath, dtTicks); // Again, I think this is guaranteed since we're only writing one 'times' dataset int firstSize = dts.Length; // Was initially writing columns - I think rows are better. (1 row per ensemble). // This way we can chunk multiple rows at a time without changing access patterns, // and it lets us block-copy conveniently on reads. h5w.Create2dExtendibleDataset <float>(valuePath, chunkSize, firstSize); using (var valueDset = h5w.OpenDataset(valuePath)) { int width = f.Ensemble.GetLength(1); float[] vals = new float[width]; // Each ensemble member is a time-series of data, add a new column for (int ensembleMember = 0; ensembleMember < f.Ensemble.GetLength(0); ensembleMember++) { Buffer.BlockCopy(f.Ensemble, ensembleMember * width * sizeof(float), vals, 0, width * sizeof(float)); h5w.AddRow(valueDset, vals); } } } } }
public static void WriteWithDataTable(string filename, Watershed watershed, bool compress = false, bool createPiscesDB = false) { var server = SqLiteEnsemble.GetServer(filename); byte[] uncompressed = null; Reclamation.TimeSeries.TimeSeriesDatabase db; int locIdx = 1; int WatershedFolderIndex = 1; int scIndex = 0; int rowCounter = 0; Reclamation.TimeSeries.TimeSeriesDatabaseDataSet.SeriesCatalogDataTable sc = null; if (createPiscesDB) { db = new Reclamation.TimeSeries.TimeSeriesDatabase(server); // limit how much we query. //var where = "id = (select max(id) from seriescatalog) or id = parentid"; var where = "id = (select max(id) from seriescatalog)"; sc = db.GetSeriesCatalog(where); WatershedFolderIndex = sc.AddFolder(watershed.Name); // creates root level folder scIndex = WatershedFolderIndex + 2; } else { } var timeSeriesTable = GetBlobTable(server); int index = server.NextID("timeseries_blob", "id"); foreach (Location loc in watershed.Locations) { if (createPiscesDB) { locIdx = sc.AddFolder(loc.Name, ++scIndex, WatershedFolderIndex); } foreach (Forecast f in loc.Forecasts) { var t = f.IssueDate; var timeseries_start_date = f.TimeStamps[0]; index++; var row = timeSeriesTable.NewRow();// create rows in separate loop first row["id"] = index; row["issue_date"] = f.IssueDate; row["watershed"] = watershed.Name; row["location_name"] = loc.Name; row["timeseries_start_date"] = timeseries_start_date; row["member_length"] = f.Ensemble.GetLength(1); row["member_count"] = f.Ensemble.GetLength(0); row["compressed"] = compress ? 1 : 0; row["byte_value_array"] = ConvertToBytes(f.Ensemble, compress, ref uncompressed); if (createPiscesDB) { string connectionString = "timeseries_blobs.id=" + index + ";member_length=" + f.Ensemble.GetLength(1) + ";ensemble_member_index={member_index}" + ";timeseries_start_date=" + timeseries_start_date.ToString("yyyy-MM-dd HH:mm:ss"); scIndex = AddPiscesSeries(loc.Name, scIndex, sc, f, locIdx, connectionString); } timeSeriesTable.Rows.Add(row); rowCounter++; if (rowCounter % 1000 == 0) { server.SaveTable(timeSeriesTable); timeSeriesTable.Rows.Clear(); timeSeriesTable.AcceptChanges(); } } } if (createPiscesDB) { server.SaveTable(sc); } server.SaveTable(timeSeriesTable); }
private static void WriteAllFormats(Watershed waterShedData, bool delete) { File.AppendAllText(logFile, NL); string fn, dir; // DSS 6/7 fn = "ensemble_V7_" + tag + ".dss"; if (delete) { File.Delete(fn); } WriteTimed(fn, tag, () => DssEnsemble.Write(fn, waterShedData)); fn = "ensemble_V7_profiles_" + tag + ".dss"; if (delete) { File.Delete(fn); } WriteTimed(fn, tag, () => DssEnsemble.WriteToTimeSeriesProfiles(fn, waterShedData)); bool compress = true; // SQLITE fn = "ensemble_sqlite_" + tag + ".db"; if (delete) { File.Delete(fn); } WriteTimed(fn, tag, () => { SqLiteEnsemble.Write(fn, waterShedData, compress, false); }); fn = "ensemble_pisces_" + tag + ".pdb"; if (delete) { File.Delete(fn); } WriteTimed(fn, tag, () => { SqLiteEnsemble.WriteWithDataTable(fn, waterShedData, compress, true); }); // Serial HDF5 fn = "ensemble_serial_1RowPerChunk.h5"; WriteTimed(fn, tag, () => { using (var h5w = new H5Writer(fn)) HDF5Ensemble.Write(h5w, waterShedData); }); // Parallel HDF5 foreach (int c in new[] { 1, 10, -1 }) { fn = "ensemble_parallel_" + c.ToString() + "RowsPerChunk.h5"; WriteTimed(fn, tag, () => { using (var h5w = new H5Writer(fn)) HDF5Ensemble.WriteParallel(h5w, waterShedData, c); }); } }
public Location(string name, Watershed watershed) { this.Name = name; Forecasts = new List <Forecast>(); this.Watershed = watershed; }
public static void WriteParallel(H5Writer h5w, Watershed watershed, int desiredChunkSize) { string root = Path(H5Reader.Root, "Watersheds", watershed.Name); var tlDt = new ThreadLocal <long[]>(() => Array.Empty <long>()); var tlValues = new ThreadLocal <float[]>(() => Array.Empty <float>()); object grpLock = new object(); foreach (Location loc in watershed.Locations) { string locationPath = Path(root, loc.Name); Parallel.ForEach(loc.Forecasts, f => { string forecastPath = Path(locationPath, f.IssueDate.Year.ToString() + "_" + f.IssueDate.DayOfYear.ToString()); // This isn't crashing, but it's causing crazy console output because of a race condition between // "Does it exist" and "Please create it" (afaict) lock (grpLock) h5w.CreateGroup(forecastPath); // 2 datasets under this group - times and values. Write out times as long tick counts, // and values as floats. Both will be column-striated for easy vertical access string valuePath = Path(forecastPath, "Values"); string timePath = Path(forecastPath, "Times"); // Based on Karl's example, all ensembles have to have the same DateTimes. Write it once. var dts = f.TimeStamps; long[] dtTicks = tlDt.Value; if (dtTicks == null || dtTicks.Length != dts.Length) { dtTicks = new long[dts.Length]; tlDt.Value = dtTicks; } for (int i = 0; i < dts.Length; i++) { dtTicks[i] = dts[i].Ticks; } // Saves a lot of time in the hdf lib I think... h5w.WriteUncompressed(timePath, dtTicks); // Again, I think this is guaranteed since we're only writing one 'times' dataset int nColumns = dts.Length; // Use -1 to mean "all members in this ensemble" int numMembers = f.Ensemble.GetLength(0); int chunkSize = desiredChunkSize; if (chunkSize == -1) { chunkSize = numMembers; } float[] buf = tlValues.Value; if (buf == null || buf.Length != nColumns * chunkSize) { buf = new float[nColumns * chunkSize]; tlValues.Value = buf; } // Was initially writing columns - I think rows are better. (1 row per ensemble). // This way we can chunk multiple rows at a time without changing access patterns, // and it lets us block-copy conveniently on reads. h5w.Create2dExtendibleDataset <float>(valuePath, chunkSize, nColumns); using (var valueDset = h5w.OpenDataset(valuePath)) { h5w.SetExtent(valueDset, new long[] { numMembers, nColumns }); // Each ensemble member is a time-series of data, add a new row... // Row-index within this chunk, resets whenever we write a chunk int relativeRow = 0; int[] chunks = new int[2]; chunks[0] = 0; // Row-start, will change chunks[1] = 0; // Column-start, won't change for (int rowIndex = 0; rowIndex < numMembers; rowIndex++) { // Copy into our chunkbuffer int ensembleOffset = rowIndex * nColumns * sizeof(float); Buffer.BlockCopy(f.Ensemble, 0, buf, relativeRow * nColumns * sizeof(float), nColumns * sizeof(float)); relativeRow++; // Are we done with this chunk? if (relativeRow == chunkSize) { // HDF5 is threadsafe. But when the compression happens internally, it locks and forces serial behavior. // This matters even if it's one row per chunk, since we can get a speedup by compressing externally h5w.WriteChunkDirect_Threadsafe(valueDset, chunks, buf); // Reset relativeRow = 0; chunks[0] += 1; } else if (rowIndex == numMembers - 1) { // We have some number of rows to write at the end... wipe the rest of the buffer // so it compresses better and gets unzipped as zeroes? for (int i = relativeRow * nColumns; i < buf.Length; i++) { buf[i] = 0; } h5w.WriteChunkDirect_Threadsafe(valueDset, chunks, buf); } } } }); } }