static void SaveItem(Steno item, bool loadOsobaId) { if (string.IsNullOrEmpty(item.OsobaId) && loadOsobaId) { var osobaId = findInHS(item.celeJmeno, item.funkce); item.OsobaId = osobaId; } int tries = 0; AddAgain: try { tries++; var id = dsc.AddOrUpdateItem(item, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); Console.Write("s"); } catch (Exception e) { if (tries < 300) { Console.Write("S"); System.Threading.Thread.Sleep(10 * 1000); goto AddAgain; } else { Console.WriteLine(e.Message); } } }
static DateTime startDt = DateTime.Now.Date.AddDays(-10); //new DateTime(2020,09,04); public static void ProcessExcelObsazenost(string fn, HlidacStatu.Api.V2.Dataset.Typed.Dataset <NemocniceData> ds) { Devmasters.Logging.Logger.Root.Info($"ProcessExcelObsazenost {fn} "); ExcelPackage.LicenseContext = LicenseContext.NonCommercial; using (var p = new ExcelPackage(new System.IO.FileInfo(fn))) { foreach (var ws in p.Workbook.Worksheets) { //first date 2020-09-04 for (int row = 11; row < 100000; row++) { var dt = ws.Cells[row, 1].GetValue <DateTime?>(); if (dt.HasValue && dt.Value >= startDt) { string id = "id_" + dt.Value.ToString("yyyy-MM-dd"); Console.Write(dt.Value.ToString("yyyy-MM-dd ")); NemocniceData data = null; try { data = ds.GetItem(id); } catch (Exception) { } if (data == null) { continue; } var region = data.regions.FirstOrDefault(m => m.region == NemocniceData.ExcelWorkBookToRegion(ws.Name)); if (region != null) { var idx = data.regions.IndexOf(region); data.regions[idx].Pacienti_bezpriznaku = ws.Cells[row, 7].GetValue <int>(); data.regions[idx].Pacienti_lehky = ws.Cells[row, 8].GetValue <int>(); data.regions[idx].Pacienti_stredni = ws.Cells[row, 9].GetValue <int>(); data.regions[idx].Pacienti_tezky = ws.Cells[row, 10].GetValue <int>(); data.regions[idx].Pacienti_zemreli = ws.Cells[row, 22].GetValue <int>() - ws.Cells[row - 1, 22].GetValue <int>(); Devmasters.Logging.Logger.Root.Info($"ProcessExcelObsazenost save {ws.Name} - {dt.Value.ToString("yyyy-MM-dd ")} - {region.region} "); ds.AddOrUpdateItem(data, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); } else { Console.WriteLine("not found region " + ws.Name); } } } } } }
static List <Most> DownloadData(HlidacStatu.Api.V2.Dataset.Typed.Dataset <Most> ds) { HttpClient httpClient = new HttpClient(); httpClient.DefaultRequestHeaders.Accept.Clear(); httpClient.DefaultRequestHeaders.Accept.Add(new System.Net.Http.Headers.MediaTypeWithQualityHeaderValue("application/json")); httpClient.DefaultRequestHeaders.Accept.Add(new System.Net.Http.Headers.MediaTypeWithQualityHeaderValue("text/plain")); httpClient.DefaultRequestHeaders.Accept.Add(new System.Net.Http.Headers.MediaTypeWithQualityHeaderValue("*/*")); //httpClient.DefaultRequestHeaders.Add("Content-Type", "application/json;charset=UTF-8"); Console.Write($"Reading data from source ...."); var content = new System.Net.Http.StringContent( "{\"bounds\":{\"epsg\":\"5514\",\"esri\":\"10267\",\"xmin\":-991863.6,\"xmax\":-77136.4,\"ymin\":-1255189.7,\"ymax\":-896165},\"layers\":\" Most\",\"zoomIndex\":17}" //cela cr //"{\"bounds\":{\"epsg\":\"5514\",\"esri\":\"10267\",\"xmin\":-727484.2,\"xmax\":-720530.9,\"ymin\":-1065980.9,\"ymax\":-1063864.3},\"layers\":\"Most Podjezd\",\"zoomIndex\":14}" //test mirosovice , System.Text.Encoding.UTF8, "application/json"); var jsonResult = httpClient.PostAsync("http://bms.clevera.cz/api/assetregistermap/GetMapAllObjects?t=1&d=0", content) .Result.Content .ReadAsStringAsync().Result; Console.WriteLine($"Done."); var data = Newtonsoft.Json.Linq.JObject.Parse(jsonResult); //JArray mosty = data["MapObjects"]; List <Most> mosty = new List <Most>(); int count = 0; int total = data["MapObjects"].Count(); ParallelOptions po = new ParallelOptions() { MaxDegreeOfParallelism = 5 }; Parallel.ForEach <JToken>(data["MapObjects"], po, jo => { var c = System.Threading.Interlocked.Increment(ref count); if (c % 20 == 0) { Console.WriteLine($"{count} z {total}"); } Most m = new Most(); m.Id = jo["g"].Value <string>(); var gps = Geo.JTSK.ToWgs( Math.Abs(jo["y"].Value <double>()), Math.Abs(jo["x"].Value <double>()) ); m.GPS_Lat = gps.getLatitude(); m.GPS_Lng = gps.getLongitude(); var dataMost = httpClient.GetStringAsync("http://bms.clevera.cz/api/assetregistermap/GetMapObjekt?g=" + m.Id).Result; var jsonMost = Newtonsoft.Json.Linq.JObject.Parse(dataMost); if (jsonMost["o"] != null) { m.Jmeno = jsonMost["o"]["n"].Value <string>(); m.MistniNazev = jsonMost["o"]["m"].Value <string>(); m.Oznaceni = jsonMost["o"]["c"].Value <string>(); string[] spravce = jsonMost["o"]["sl"].Value <string>().Split('|'); m.SpravaOrganizace = spravce[0]; if (spravce.Length > 1) { m.SpravaStredisko = spravce[1]; if (spravce.Length > 2) { m.SpravaProvozniUsek = spravce[2]; } } m.PopisStavu = jsonMost["o"]["s"].Value <string>()?.Trim() ?? ""; if (m.PopisStavu.StartsWith("I ")) { m.Stav = 1; } else if (m.PopisStavu.StartsWith("II ")) { m.Stav = 2; } else if (m.PopisStavu.StartsWith("III ")) { m.Stav = 3; } else if (m.PopisStavu.StartsWith("IV ")) { m.Stav = 4; } else if (m.PopisStavu.StartsWith("V ")) { m.Stav = 5; } else if (m.PopisStavu.StartsWith("VI ")) { m.Stav = 6; } else if (m.PopisStavu.StartsWith("VII ")) { m.Stav = 7; } else if (m.PopisStavu.StartsWith("VIII ")) { m.Stav = 8; } m.ProhlidkaPopis = jsonMost["o"]["p"].Value <string>()?.Trim() ?? ""; var dat = GetRegexGroupValue(m.ProhlidkaPopis, @"(?<dat>\d{2}\.\d{2}\.\d{4})", "dat"); if (!string.IsNullOrEmpty(dat)) { if (DateTime.TryParseExact(dat, "dd.MM.yyyy", System.Globalization.CultureInfo.GetCultureInfo("cs"), System.Globalization.DateTimeStyles.AssumeLocal, out var datum)) { m.PosledniProhlidka = datum; } } mosty.Add(m); var id = ds.AddOrUpdateItem(m, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); } }); return(mosty); }
static void Main(string[] arguments) { Console.WriteLine($"Jednání-Rady-ČT - {System.Reflection.Assembly.GetEntryAssembly().GetName().Version}"); Devmasters.Logging.Logger.Root.Info($"Jednání-Rady-ČT - {System.Reflection.Assembly.GetEntryAssembly().GetName().Version}"); Devmasters.Logging.Logger.Root.Debug("Jednání Rady ČT starting with " + string.Join(',', arguments)); var args = new Devmasters.Args(arguments, new string[] { "/mp3path", "/apikey" }); if (args.MandatoryPresent() == false) { Help(); } mp3path = args.Get("/mp3path", null); if (args.Exists("/utdl")) { YTDL = args["/utdl"]; } else { YTDL = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) + "\\youtube-dl.exe"; } startPath = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location); apiKey = args["/apikey"]; rewrite = args.Exists("/rewrite"); afterDay = DateTime.Now.Date.AddDays(-1 * args.GetNumber("/daysback", 10000).Value); if (args.Exists("/ids")) { ids = args.GetArray("/ids"); } skips2t = args.Exists("/skips2t"); int threads = args.GetNumber("/t") ?? 5; try { ds = HlidacStatu.Api.V2.Dataset.Typed.Dataset <Jednani> .OpenDataset(apiKey, DataSetId); } catch (ApiException e) { ds = HlidacStatu.Api.V2.Dataset.Typed.Dataset <Jednani> .CreateDataset(apiKey, Registration()); } catch (Exception e) { throw; } string nextPages = "https://www.ceskatelevize.cz/ivysilani/10000000064-jednani-rady-ceske-televize/dalsi-casti/{0}"; int page = 0; bool stop = false; List <Jednani> jednani = new List <Jednani>(); do { page++; using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(string.Format(nextPages, page))) { Console.WriteLine($"Page {page}"); net.IgnoreHttpErrors = true; net.Tries = 5; net.TimeInMsBetweenTries = 2000; string html = ""; try { Devmasters.Logging.Logger.Root.Debug($"downloading {net.Url} "); html = net.GetContent().Text; } catch (Exception e) { Devmasters.Logging.Logger.Root.Error($"{net.Url} failed", e); } Devmasters.XPath xp = new Devmasters.XPath(html); var links = xp.GetNodes("//li[contains(@class,'itemBlock')]"); if (links == null || links.Count == 0) { break; } foreach (var link in links) { Jednani j = new Jednani(); j.Odkaz = urlPrefix + Devmasters.XPath.Tools.GetNodeAttributeValue(link, "div/h3/a[@class='itemSetPaging']", "href"); j.Titulek = Devmasters.XPath.Tools.GetNodeText(link, "div/h3/a[@class='itemSetPaging']").Trim(); j.DatumJednani = Devmasters.DT.Util.ToDate(Devmasters.XPath.Tools.GetNodeText(link, "div/p").Trim()) ?? DateTime.MinValue; j.Id = Devmasters.RegexUtil.GetRegexGroupValue(j.Odkaz, "/ivysilani/10000000064-jednani-rady-ceske-televize/(?<id>\\d{2,})", "id"); if (j.DatumJednani > afterDay && (ids == null || ids.Contains(j.Id)) ) { jednani.Add(j); } } } } while (stop == false); // Devmasters.Logging.Logger.Root.Debug($"Starting {jednani.Count} items "); Devmasters.Batch.Manager.DoActionForAll <string>(jednani.Select(m => m.Id).Reverse(), id => { bool exists = ds.ItemExists(id); if (!string.IsNullOrEmpty(id) && (!exists || rewrite) ) { Devmasters.Logging.Logger.Root.Debug($"Start parsing {id} "); var fullJ = ParseJednani(jednani.First(m => m.Id == id)); Devmasters.Logging.Logger.Root.Debug($"Saving {id} "); ds.AddOrUpdateItem(fullJ, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); } else if (exists) { //check voice2text var fullJ = ds.GetItemSafe(id); if (!(fullJ.PrepisAudia?.Count() > 0)) { Devmasters.Logging.Logger.Root.Debug($"Checking AUDIO text {id} "); var aud = Audio(fullJ); if (aud?.Count() > 0) { fullJ.PrepisAudia = aud; ds.AddOrUpdateItem(fullJ, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); } } } return(new Devmasters.Batch.ActionOutputData() { Log = id }); }, true, maxDegreeOfParallelism: threads); }
private static void ProcessXML(Devmasters.Args args, string name) { logger.Debug($"Starting {name}.xml"); if (System.IO.File.Exists(name + ".xml")) { if (args.Exists("/uselocal")) { //skip next, use local file } else if (force || (DateTime.Now - new System.IO.FileInfo(name + ".xml").LastWriteTime).TotalDays > 4) { logger.Debug($"downloading new {name}.xml"); Console.WriteLine($"Downloading new {name}"); DownloadFile(name); } } else { logger.Debug($"downloading new {name}.xml"); Console.WriteLine($"Downloading {name}"); DownloadFile(name); } if (!System.IO.File.Exists(name + ".xml")) { return; } rawXML d = null; Console.WriteLine($"Deserializing {name}"); logger.Debug($"Deserializing {name}.xml"); using (var xmlReader = new System.IO.StreamReader(name + ".xml")) { var serializer = new XmlSerializer(typeof(rawXML)); d = (rawXML)serializer.Deserialize(xmlReader); } Console.WriteLine($"{d.Subjekt?.Count()} subjects"); Devmasters.Batch.Manager.DoActionForAll <xmlSubjekt>(d.Subjekt //.Where(m=>m.ico== "3493661") //debug , subj => { majitele item = majitele.GetMajitele(subj); if (item != null && item?.skutecni_majitele?.Count() > 0) { if (!ds.ItemExists(item.ico) || force) { item.UpdateOsobaId(); ds.AddOrUpdateItem(item, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); } else { //check change var old = ds.GetItem(item.ico); if (old != null) { var same = true; if (old.skutecni_majitele?.Count() != item.skutecni_majitele?.Count()) { same = false; } else if (item.skutecni_majitele?.Count() == old.skutecni_majitele?.Count() && item.skutecni_majitele?.Count() > 0) { foreach (var sm in item.skutecni_majitele) { same = same && old.skutecni_majitele.Any(m => m.osoba_jmeno == sm.osoba_jmeno && m.osoba_prijmeni == sm.osoba_prijmeni && m.osoba_datum_narozeni == sm.osoba_datum_narozeni && m.osoba_titul_pred == sm.osoba_titul_pred && m.osoba_titul_za == sm.osoba_titul_za && m.adresa_cast_obce == sm.adresa_cast_obce && m.adresa_cislo_ev == sm.adresa_cislo_ev && m.adresa_cislo_or == sm.adresa_cislo_or && m.adresa_cislo_po == sm.adresa_cislo_po && m.adresa_obec == sm.adresa_obec && m.adresa_okres == sm.adresa_okres && m.adresa_psc == sm.adresa_psc && m.adresa_stat_nazev == sm.adresa_stat_nazev && m.adresa_text == sm.adresa_text && m.adresa_ulice == sm.adresa_ulice && m.slovni_vyjadreni == sm.slovni_vyjadreni && m.podil == sm.podil && m.postaveni == sm.postaveni && !string.IsNullOrEmpty(m.osobaId) ); } } if (same == false) { item.UpdateOsobaId(); ds.AddOrUpdateItem(item, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); } } } } return(new Devmasters.Batch.ActionOutputData()); }, Devmasters.Batch.Manager.DefaultOutputWriter, Devmasters.Batch.Manager.DefaultProgressWriter, !System.Diagnostics.Debugger.IsAttached, maxDegreeOfParallelism: 4, prefix: $"{name} ITEMS "); }
public static void Process(osoba o, string playlist, int threads, int max, string[] vids, string mp3path) { logger.Info($"Starting {o.Jmeno} {o.Prijmeni} {o.NameId} for {playlist} "); List <string> videos = null; if (vids?.Count() > 0) { videos = vids .Select(m => "https://www.youtube.com/watch?v=" + m) .ToList(); } else { System.Diagnostics.ProcessStartInfo pi = new System.Diagnostics.ProcessStartInfo("youtube-dl", $"--flat-playlist --get-id --playlist-end {max} " + playlist ); Devmasters.ProcessExecutor pe = new Devmasters.ProcessExecutor(pi, 60 * 6 * 24); logger.Info($"Starting Youtube-dl playlist video list "); pe.Start(); videos = pe.StandardOutput .Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries) .Select(m => "https://www.youtube.com/watch?v=" + m) .ToList(); } Console.WriteLine(); Console.WriteLine($"Processing {videos.Count} videos"); Console.WriteLine(); Console.WriteLine(); Console.WriteLine(); Console.WriteLine(); Devmasters.Batch.Manager.DoActionForAll(videos, vid => { string uniqId = record.UniqueID(vid); record rec = null; bool merge = false; bool changed = false; if (Program.api2.ItemExists(uniqId)) { rec = Program.api2.GetItem(uniqId); merge = true; } else { rec = YTDL.GetVideoInfo(vid); if (rec == null) { return(new Devmasters.Batch.ActionOutputData()); } rec.osobaid = o.NameId; changed = true; } string recId = uniqId; string fnFile = $"{mp3path}\\{DataSetId}\\{recId}"; var MP3Fn = $"{fnFile}.mp3"; var newtonFn = $"{fnFile}.mp3.raw_s2t"; var dockerFn = $"{fnFile}.ctm"; if (System.IO.File.Exists(MP3Fn) == false) { System.Diagnostics.ProcessStartInfo piv = new System.Diagnostics.ProcessStartInfo("youtube-dl.exe", $"--no-progress --extract-audio --audio-format mp3 --postprocessor-args \" -ac 1 -ar 16000\" -o \"{fnFile}.%(ext)s\" " + vid ); Devmasters.ProcessExecutor pev = new Devmasters.ProcessExecutor(piv, 60 * 6 * 24); pev.StandardOutputDataReceived += (ox, e) => { logger.Debug(e.Data); }; logger.Info($"Starting Youtube-dl for {vid} "); pev.Start(); } bool exists_S2T = System.IO.File.Exists(newtonFn) || System.IO.File.Exists(dockerFn); if (exists_S2T == false && rec.prepisAudia == null) { using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent( $"https://www.hlidacstatu.cz/api/v2/internalq/Voice2TextNewTask/{DataSetId}/{recId}?priority=2") ) { net.Method = Devmasters.Net.HttpClient.MethodEnum.POST; net.RequestParams.Headers.Add("Authorization", System.Configuration.ConfigurationManager.AppSettings["apikey"]); net.GetContent(); } } if (exists_S2T && !(rec.prepisAudia?.Count() > 0)) { if (System.IO.File.Exists(dockerFn)) { var tt = new KaldiASR.SpeechToText.VoiceToTerms(System.IO.File.ReadAllText(dockerFn)); var blocks = new Devmasters.SpeechToText.VoiceToTextFormatter(tt.Terms) .TextWithTimestamps(TimeSpan.FromSeconds(10), true) .Select(t => new record.Blok() { sekundOdZacatku = (long)t.Start.TotalSeconds, text = t.Text }) .ToArray(); //TODO opravit casem var tmpRec = YTDL.GetVideoInfo(vid); if (tmpRec != null) { rec.text = tmpRec.text + "\n\n" + new Devmasters.SpeechToText.VoiceToTextFormatter(tt.Terms).Text(true); } rec.prepisAudia = blocks; changed = true; } } if (changed) { api2.AddOrUpdateItem(rec, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); } return(new Devmasters.Batch.ActionOutputData()); }, Devmasters.Batch.Manager.DefaultOutputWriter, Devmasters.Batch.Manager.DefaultProgressWriter, !System.Diagnostics.Debugger.IsAttached, maxDegreeOfParallelism: threads ); }
public static void ParsePages(string datasetId, int startFrom = 10000, int count = 600) { Devmasters.Batch.Manager.DoActionForAll <int>(Enumerable.Range(startFrom, count), //jedeme v 2 threadech, bud ohleduplny a nedavej vice (i) => { string url = ""; try { //stahnutí HTML stránky s rozhodnutím UOHS. //rozhodnutí jsou na samostatnych stránkach, s jednoduchym URL, kde cislo stranky s rozhodnutim postupně roste. // k 1.9.2018 ma posledni rozhodnuti cislo asi 15500 string html = ""; url = $"http://www.uohs.cz/cs/verejne-zakazky/sbirky-rozhodnuti/detail-{i}.html"; //stahnuti HTML System.Net.WebClient wc = new System.Net.WebClient(); wc.Encoding = System.Text.Encoding.UTF8; html = wc.DownloadString(url); //prevedeni do XHTML pomoci HTMLAgilityPacku. //XPath je trida a sada funkci pro jednodusi XPath parsovani Devmasters.XPath page = new Devmasters.XPath(html); //vsechna ziskavana data jsou ziskana pomoci XPATH //stranka neexistuje, tak ji preskocime if (page.GetNodeText("//head/title")?.Contains("stránka neexistuje") == true) { return(new Devmasters.Batch.ActionOutputData()); } logger.Debug($"parsing {url}"); //do item davam postupně získané údaje var item = new UOHSData(); item.Url = url; item.Id = i.ToString(); //žádný obsah není mimo tento DIV, tak si ho sem dam, abych tento retezec nemusel porad opakovat var root = "//div[@id='content']"; //parsování pomocí XPath. item.Cj = page.GetNodeText(root + "//div/h1/strong[1]")?.Replace("Rozhodnutí: ", ""); item.SpisovaZnacka = page.GetNodeText(root + "//div/h1/strong[2]")?.Replace("Rozhodnutí: ", ""); item.SoudniRozhodnuti = page.GetNodeText(root + "//div//h1/following-sibling::h2[1]"); item.Instance = page.GetNodeText(root + "//table[@id='resolution_detail']//tr//th[contains(text(),'Instance')]/parent::tr/td"); item.Vec = page.GetNodeText(root + "//table[@id='resolution_detail']//tr//th[contains(text(),'Věc')]/parent::tr/td"); var ucastniciNode = page.GetNodes(root + "//table[@id='resolution_detail']//tr//th[contains(text(),'Účastníci')]/parent::tr/td/ol/li"); List <UOHSData.Ucastnik> ucastnici = new List <UOHSData.Ucastnik>(); if (ucastniciNode != null) { foreach (var node in ucastniciNode) { var firmaJmeno = System.Net.WebUtility.HtmlDecode(node.InnerText); //konverze HTML entity to UTF-8; é -> é //dohledat ICO var ico = httpClient.GetAsync("https://www.hlidacstatu.cz/api/v2/firmy/" + System.Net.WebUtility.UrlEncode(firmaJmeno)) .Result.Content .ReadAsStringAsync().Result; try { var icoRes = Newtonsoft.Json.Linq.JObject.Parse(ico); if (icoRes["ico"] == null) { ucastnici.Add(new UOHSData.Ucastnik() { Jmeno = firmaJmeno }); } else { ucastnici.Add(new UOHSData.Ucastnik() { Jmeno = firmaJmeno, ICO = icoRes["ico"].Value <string>() }); } } catch (Exception) { ucastnici.Add(new UOHSData.Ucastnik() { Jmeno = firmaJmeno }); } } } item.Ucastnici = ucastnici.ToArray(); item.Typ_spravniho_rizeni = page.GetNodeText(root + "//table[@id='resolution_detail']//tr//th[contains(text(),'Typ správního řízení')]/parent::tr/td"); item.Typ_rozhodnuti = page.GetNodeText(root + "//table[@id='resolution_detail']//tr//th[contains(text(),'Typ rozhodnutí')]/parent::tr/td"); item.Rok = page.GetNodeText(root + "//table[@id='resolution_detail']//tr//th[contains(text(),'Rok')]/parent::tr/td"); item.PravniMoc = ToDateTimeFromCZ( page.GetNodeText(root + "//table[@id='resolution_detail']//tr//th[contains(text(),'Datum nabytí právní moci')]/parent::tr/td") ); var souvis_urls = page.GetNodes(root + "//table[@id='resolution_detail']//tr//th[contains(text(),'Související rozhodnutí')]/parent::tr/td/a"); if (souvis_urls != null) { item.SouvisejiciUrl = souvis_urls .Select(m => m.Attributes["href"]?.Value) .Where(m => m != null) .Select(u => "http://www.uohs.cz" + u) .ToArray(); } item.Rozhodnuti = new UOHSData.Dokument(); var documents = page.GetNodes(root + "//table[@id='resolution_detail']//tr//th[contains(text(),'Dokumenty')]/parent::tr/td/a"); item.Rozhodnuti.Url = page.GetNode(root + "//table[@id='resolution_detail']//tr//th[contains(text(),'Dokumenty')]/parent::tr/td/a") ?.Attributes["href"]?.Value; if (!string.IsNullOrEmpty(item.Rozhodnuti.Url)) { item.Rozhodnuti.Url = "http://www.uohs.cz" + item.SouvisejiciUrl; } item.Rozhodnuti.PlainText = page.GetNode("//div[@id='content']//div[@class='res_text']")?.InnerText ?? ""; //parsovani hotovo, jdu ulozit zaznam do Datasetu logger.Debug($"adding item {item.Id} - {item.Url}"); ds.AddOrUpdateItem(item, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); } catch (Exception e) { logger.Error(url, e); } return(new Devmasters.Batch.ActionOutputData()); }, outputWriter.OutputWriter, progressWriter.ProgressWriter, !System.Diagnostics.Debugger.IsAttached ); }
public async Task <string> Add(Trest item) { return(Connector.AddOrUpdateItem(item, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite)); }
static void Main(string[] args) { var DArgs = args .Select(m => m.Split('=')) .ToDictionary(m => m[0].ToLower(), v => v.Length == 1 ? "" : v[1]); CreateDataset(DArgs); //find xls url string openDataPage = "https://dip.mzcr.cz/api/v1/kapacity-intenzivni-pece-zdravotnicke-zarizeni.csv"; Uri xlsUrl = null; DateTime fromD = DateTime.Now.Date.AddDays(-30); Devmasters.Logging.Logger.Root.Info("Getting URL of csv from " + openDataPage); using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(openDataPage)) { Devmasters.Logging.Logger.Root.Info("Getting csv"); var fn = "kapacity-intenzivni-pece-zdravotnicke-zarizeni.csv"; System.IO.File.WriteAllBytes(fn, net.GetBinary().Binary); using (var reader = new StreamReader(fn)) //new StringReader(html)) { using (var csv = new CsvReader(reader, new CsvHelper.Configuration.CsvConfiguration(System.Globalization.CultureInfo.InvariantCulture) { Delimiter = ",", IgnoreBlankLines = true, HasHeaderRecord = true, TrimOptions = CsvHelper.Configuration.TrimOptions.Trim })) { //csv.Context.RegisterClassMap<NemocniceMap>(); //var records = csv.GetRecords<Nemocnice>().ToArray(); List <Nemocnice> recs = new List <Nemocnice>(); if (true) { csv.Read(); csv.ReadHeader(); while (csv.Read()) { var record = new Nemocnice { crrt_kapacita_celkem = csv.GetField <int>("crrt_kapacita_celkem"), crrt_kapacita_volna = csv.GetField <int>("crrt_kapacita_volna"), ecmo_kapacita_celkem = csv.GetField <int>("ecmo_kapacita_celkem"), ecmo_kapacita_volna = csv.GetField <int>("ecmo_kapacita_volna"), ihd_kapacita_celkem = csv.GetField <int>("ihd_kapacita_celkem"), ihd_kapacita_volna = csv.GetField <int>("ihd_kapacita_volna"), kraj_nazev = csv.GetField <string>("kraj_nazev"), kraj_nuts_kod = csv.GetField <string>("kraj_nuts_kod"), luzka_aro_jip_kapacita_celkem = csv.GetField <int>("luzka_aro_jip_kapacita_celkem"), luzka_aro_jip_kapacita_volna_covid_negativni = csv.GetField <int>("luzka_aro_jip_kapacita_volna_covid_negativni"), luzka_aro_jip_kapacita_volna_covid_pozitivni = csv.GetField <int>("luzka_aro_jip_kapacita_volna_covid_pozitivni"), luzka_standard_kyslik_kapacita_celkem = csv.GetField <int>("luzka_standard_kyslik_kapacita_celkem"), luzka_standard_kyslik_kapacita_volna_covid_negativni = csv.GetField <int>("luzka_standard_kyslik_kapacita_volna_covid_negativni"), luzka_standard_kyslik_kapacita_volna_covid_pozitivni = csv.GetField <int>("luzka_standard_kyslik_kapacita_volna_covid_pozitivni"), reprofilizovana_kapacita_luzka_aro_jip_kapacita_celkem = csv.GetField <int?>("reprofilizovana_kapacita_luzka_aro_jip_kapacita_celkem") ?? 0, reprofilizovana_kapacita_luzka_aro_jip_kapacita_planovana = csv.GetField <int?>("reprofilizovana_kapacita_luzka_aro_jip_kapacita_planovana") ?? 0, reprofilizovana_kapacita_luzka_aro_jip_kapacita_volna = csv.GetField <int?>("reprofilizovana_kapacita_luzka_aro_jip_kapacita_volna") ?? 0, reprofilizovana_kapacita_luzka_standard_kyslik_kapacita_celkem = csv.GetField <int?>("reprofilizovana_kapacita_luzka_standard_kyslik_kapacita_celkem") ?? 0, reprofilizovana_kapacita_luzka_standard_kyslik_kapacita_planovana = csv.GetField <int?>("reprofilizovana_kapacita_luzka_standard_kyslik_kapacita_planovana") ?? 0, reprofilizovana_kapacita_luzka_standard_kyslik_kapacita_volna = csv.GetField <int?>("reprofilizovana_kapacita_luzka_standard_kyslik_kapacita_volna") ?? 0, upv_kapacita_celkem = csv.GetField <int>("upv_kapacita_celkem"), upv_kapacita_volna = csv.GetField <int>("upv_kapacita_volna"), ventilatory_operacni_sal_kapacita_celkem = csv.GetField <int>("ventilatory_operacni_sal_kapacita_celkem"), ventilatory_operacni_sal_kapacita_volna = csv.GetField <int>("ventilatory_operacni_sal_kapacita_volna"), ventilatory_prenosne_kapacita_celkem = csv.GetField <int>("ventilatory_prenosne_kapacita_celkem"), ventilatory_prenosne_kapacita_volna = csv.GetField <int>("ventilatory_prenosne_kapacita_volna"), zz_kod = csv.GetField <string>("zz_kod"), zz_nazev = csv.GetField <string>("zz_nazev"), datum = DateTime.ParseExact(csv.GetField <string>("datum"), "yyyy-MM-dd", System.Globalization.CultureInfo.CurrentCulture) }; var iis = record.id; if (record.datum > fromD) { recs.Add(record); } } } //ds.AddOrRewriteItems(recs); Devmasters.Batch.Manager.DoActionForAll <Nemocnice>(recs.OrderByDescending(o => o.datum), n => { Console.Write("."); ds.AddOrUpdateItem(n, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.skip); return(new Devmasters.Batch.ActionOutputData()); }, false); } } } }
private static void OpenDataDIP() { List <NemocniceData.Region> raw = new List <NemocniceData.Region>(); //process https://dip.mzcr.cz/api/v1/kapacity-intenzivni-pece-vlna-2.csv DateTime mindate = DateTime.Now.Date.AddDays(-120); using (var net = new System.Net.Http.HttpClient().GetStreamAsync("https://dip.mzcr.cz/api/v1/kapacity-intenzivni-pece-zdravotnicke-zarizeni-04-2021.csv")) { using (System.IO.StreamReader rr = new StreamReader(net.Result)) { var csv = new CsvHelper.CsvReader(rr, new CsvHelper.Configuration.CsvConfiguration(System.Globalization.CultureInfo.GetCultureInfo("cs")) { HasHeaderRecord = true, Delimiter = "," }); csv.Read(); csv.ReadHeader(); //csv.Read();//skip second line while (csv.Read()) { DateTime?date = Devmasters.DT.Util.ParseDateTime(csv.GetField <string>("datum")?.Trim(), null); if (date == null) { continue; } DateTime dt = date.Value; if (dt < mindate) { continue; } Console.WriteLine("."); string kraj_nuts_kod = csv.GetField <string>("kraj_nuts_kod"); string region = Kraje[kraj_nuts_kod]; var r = new NemocniceData.Region(); r.lastModified = date.Value; r.region = region; r.name = csv.GetField <string>("zz_nazev"); r.UPV_celkem = 0; r.UPV_volna = 0; r.ECMO_celkem = Devmasters.TextUtil.ConvertToInt(csv.GetField <string>("ecmo_kapacita_celkem"), 0).Value; r.ECMO_volna = Devmasters.TextUtil.ConvertToInt(csv.GetField <string>("ecmo_kapacita_volna"), 0).Value; r.CRRT_celkem = Devmasters.TextUtil.ConvertToInt(csv.GetField <string>("cvvhd_kapacita_celkem"), 0).Value; r.CRRT_volna = Devmasters.TextUtil.ConvertToInt(csv.GetField <string>("cvvhd_kapacita_volna"), 0).Value; r.IHD_celkem = 0; r.IHD_volna = 0; r.AROJIP_luzka_celkem = Devmasters.TextUtil.ConvertToInt(csv.GetField <string>("luzka_upv_niv_kapacita_celkem"), 0).Value; r.AROJIP_luzka_covid = Devmasters.TextUtil.ConvertToInt(csv.GetField <string>("luzka_upv_niv_kapacita_volna_covid_pozitivni"), 0).Value; r.AROJIP_luzka_necovid = Devmasters.TextUtil.ConvertToInt(csv.GetField <string>("luzka_upv_niv_kapacita_volna_covid_negativni"), 0).Value; r.Standard_luzka_s_kyslikem_celkem = Devmasters.TextUtil.ConvertToInt(csv.GetField <string>("luzka_standard_kyslik_kapacita_celkem"), 0).Value; r.Standard_luzka_s_kyslikem_covid = Devmasters.TextUtil.ConvertToInt(csv.GetField <string>("luzka_standard_kyslik_kapacita_volna_covid_pozitivni"), 0).Value; r.Standard_luzka_s_kyslikem_necovid = Devmasters.TextUtil.ConvertToInt(csv.GetField <string>("luzka_standard_kyslik_kapacita_volna_covid_negativni"), 0).Value; //r.Lekari_AROJIP_celkem = 0; //r.Lekari_AROJIP_dostupni = 0; //r.Sestry_AROJIP_celkem = 0; //r.Sestry_AROJIP_dostupni = 0; //r.Standard_luzka_celkem = 0; //r.Standard_luzka_s_monitor_celkem = 0; r.Ventilatory_prenosne_celkem = Devmasters.TextUtil.ConvertToInt(csv.GetField <string>("ventilatory_prenosne_kapacita_celkem"), 0).Value; r.Ventilatory_operacnisal_celkem = Devmasters.TextUtil.ConvertToInt(csv.GetField <string>("ventilatory_operacni_sal_kapacita_celkem"), 0).Value; raw.Add(r); } } } Devmasters.Logging.Logger.Root.Info("Saving"); var dates = raw.Select(m => m.lastModified).Distinct(); var kraje = raw.Select(m => m.region).Distinct(); foreach (var dt in dates.Where(m => m > mindate)) { Console.WriteLine(dt); NemocniceData nem = null; //new NemocniceData(); var id = "id_" + dt.ToString("yyyy-MM-dd"); List <NemocniceData.Region> nr = new List <NemocniceData.Region>(); foreach (var kr in kraje) { var lines = raw.Where(m => m.lastModified == dt && m.region == kr).ToArray(); var r = NemocniceData.Aggregate(lines); r.region = kr; nr.Add(r); } try { nem = ds.GetItem(id); // new NemocniceData(); } catch (Exception) { } if (nem == null) { nem = new NemocniceData(); } nem.id = id; nem.regions = nr; nem.lastUpdated = dt; ds.AddOrUpdateItem(nem, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); } //ds.AddOrUpdateItem(nd, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); }