private static void ProcessXML(Devmasters.Args args, string name) { logger.Debug($"Starting {name}.xml"); if (System.IO.File.Exists(name + ".xml")) { if (args.Exists("/uselocal")) { //skip next, use local file } else if (force || (DateTime.Now - new System.IO.FileInfo(name + ".xml").LastWriteTime).TotalDays > 4) { logger.Debug($"downloading new {name}.xml"); Console.WriteLine($"Downloading new {name}"); DownloadFile(name); } } else { logger.Debug($"downloading new {name}.xml"); Console.WriteLine($"Downloading {name}"); DownloadFile(name); } if (!System.IO.File.Exists(name + ".xml")) { return; } rawXML d = null; Console.WriteLine($"Deserializing {name}"); logger.Debug($"Deserializing {name}.xml"); using (var xmlReader = new System.IO.StreamReader(name + ".xml")) { var serializer = new XmlSerializer(typeof(rawXML)); d = (rawXML)serializer.Deserialize(xmlReader); } Console.WriteLine($"{d.Subjekt?.Count()} subjects"); Devmasters.Batch.Manager.DoActionForAll <xmlSubjekt>(d.Subjekt //.Where(m=>m.ico== "3493661") //debug , subj => { majitele item = majitele.GetMajitele(subj); if (item != null && item?.skutecni_majitele?.Count() > 0) { if (!ds.ItemExists(item.ico) || force) { item.UpdateOsobaId(); ds.AddOrUpdateItem(item, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); } else { //check change var old = ds.GetItem(item.ico); if (old != null) { var same = true; if (old.skutecni_majitele?.Count() != item.skutecni_majitele?.Count()) { same = false; } else if (item.skutecni_majitele?.Count() == old.skutecni_majitele?.Count() && item.skutecni_majitele?.Count() > 0) { foreach (var sm in item.skutecni_majitele) { same = same && old.skutecni_majitele.Any(m => m.osoba_jmeno == sm.osoba_jmeno && m.osoba_prijmeni == sm.osoba_prijmeni && m.osoba_datum_narozeni == sm.osoba_datum_narozeni && m.osoba_titul_pred == sm.osoba_titul_pred && m.osoba_titul_za == sm.osoba_titul_za && m.adresa_cast_obce == sm.adresa_cast_obce && m.adresa_cislo_ev == sm.adresa_cislo_ev && m.adresa_cislo_or == sm.adresa_cislo_or && m.adresa_cislo_po == sm.adresa_cislo_po && m.adresa_obec == sm.adresa_obec && m.adresa_okres == sm.adresa_okres && m.adresa_psc == sm.adresa_psc && m.adresa_stat_nazev == sm.adresa_stat_nazev && m.adresa_text == sm.adresa_text && m.adresa_ulice == sm.adresa_ulice && m.slovni_vyjadreni == sm.slovni_vyjadreni && m.podil == sm.podil && m.postaveni == sm.postaveni && !string.IsNullOrEmpty(m.osobaId) ); } } if (same == false) { item.UpdateOsobaId(); ds.AddOrUpdateItem(item, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); } } } } return(new Devmasters.Batch.ActionOutputData()); }, Devmasters.Batch.Manager.DefaultOutputWriter, Devmasters.Batch.Manager.DefaultProgressWriter, !System.Diagnostics.Debugger.IsAttached, maxDegreeOfParallelism: 4, prefix: $"{name} ITEMS "); }
static void Main(string[] arguments) { Console.WriteLine($"Jednání-Rady-ČT - {System.Reflection.Assembly.GetEntryAssembly().GetName().Version}"); Devmasters.Logging.Logger.Root.Info($"Jednání-Rady-ČT - {System.Reflection.Assembly.GetEntryAssembly().GetName().Version}"); Devmasters.Logging.Logger.Root.Debug("Jednání Rady ČT starting with " + string.Join(',', arguments)); var args = new Devmasters.Args(arguments, new string[] { "/mp3path", "/apikey" }); if (args.MandatoryPresent() == false) { Help(); } mp3path = args.Get("/mp3path", null); if (args.Exists("/utdl")) { YTDL = args["/utdl"]; } else { YTDL = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) + "\\youtube-dl.exe"; } startPath = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location); apiKey = args["/apikey"]; rewrite = args.Exists("/rewrite"); afterDay = DateTime.Now.Date.AddDays(-1 * args.GetNumber("/daysback", 10000).Value); if (args.Exists("/ids")) { ids = args.GetArray("/ids"); } skips2t = args.Exists("/skips2t"); int threads = args.GetNumber("/t") ?? 5; try { ds = HlidacStatu.Api.V2.Dataset.Typed.Dataset <Jednani> .OpenDataset(apiKey, DataSetId); } catch (ApiException e) { ds = HlidacStatu.Api.V2.Dataset.Typed.Dataset <Jednani> .CreateDataset(apiKey, Registration()); } catch (Exception e) { throw; } string nextPages = "https://www.ceskatelevize.cz/ivysilani/10000000064-jednani-rady-ceske-televize/dalsi-casti/{0}"; int page = 0; bool stop = false; List <Jednani> jednani = new List <Jednani>(); do { page++; using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(string.Format(nextPages, page))) { Console.WriteLine($"Page {page}"); net.IgnoreHttpErrors = true; net.Tries = 5; net.TimeInMsBetweenTries = 2000; string html = ""; try { Devmasters.Logging.Logger.Root.Debug($"downloading {net.Url} "); html = net.GetContent().Text; } catch (Exception e) { Devmasters.Logging.Logger.Root.Error($"{net.Url} failed", e); } Devmasters.XPath xp = new Devmasters.XPath(html); var links = xp.GetNodes("//li[contains(@class,'itemBlock')]"); if (links == null || links.Count == 0) { break; } foreach (var link in links) { Jednani j = new Jednani(); j.Odkaz = urlPrefix + Devmasters.XPath.Tools.GetNodeAttributeValue(link, "div/h3/a[@class='itemSetPaging']", "href"); j.Titulek = Devmasters.XPath.Tools.GetNodeText(link, "div/h3/a[@class='itemSetPaging']").Trim(); j.DatumJednani = Devmasters.DT.Util.ToDate(Devmasters.XPath.Tools.GetNodeText(link, "div/p").Trim()) ?? DateTime.MinValue; j.Id = Devmasters.RegexUtil.GetRegexGroupValue(j.Odkaz, "/ivysilani/10000000064-jednani-rady-ceske-televize/(?<id>\\d{2,})", "id"); if (j.DatumJednani > afterDay && (ids == null || ids.Contains(j.Id)) ) { jednani.Add(j); } } } } while (stop == false); // Devmasters.Logging.Logger.Root.Debug($"Starting {jednani.Count} items "); Devmasters.Batch.Manager.DoActionForAll <string>(jednani.Select(m => m.Id).Reverse(), id => { bool exists = ds.ItemExists(id); if (!string.IsNullOrEmpty(id) && (!exists || rewrite) ) { Devmasters.Logging.Logger.Root.Debug($"Start parsing {id} "); var fullJ = ParseJednani(jednani.First(m => m.Id == id)); Devmasters.Logging.Logger.Root.Debug($"Saving {id} "); ds.AddOrUpdateItem(fullJ, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); } else if (exists) { //check voice2text var fullJ = ds.GetItemSafe(id); if (!(fullJ.PrepisAudia?.Count() > 0)) { Devmasters.Logging.Logger.Root.Debug($"Checking AUDIO text {id} "); var aud = Audio(fullJ); if (aud?.Count() > 0) { fullJ.PrepisAudia = aud; ds.AddOrUpdateItem(fullJ, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); } } } return(new Devmasters.Batch.ActionOutputData() { Log = id }); }, true, maxDegreeOfParallelism: threads); }
static void Main(string[] args) { string argValue = string.Empty; if (args.Count() == 0) { Help(); return; } Dictionary <string, string> arguments = new Dictionary <string, string>(); arguments = args .Select(m => m.Split('=')) .ToDictionary(m => m[0].ToLower(), v => v.Length == 1 ? "" : v[1]); if (!arguments.TryGetValue("/apikey", out apikey)) { Help(); return; } int daysBack = 3; if (arguments.TryGetValue("/daysback", out argValue)) { daysBack = Convert.ToInt32(argValue); } int rok = 0; if (arguments.TryGetValue("/rok", out argValue)) { rok = Convert.ToInt32(argValue); } else { Help(); return; } bool rewrite = false; if (arguments.TryGetValue("/rewrite", out argValue)) { rewrite = true; } int?schuze = null; if (arguments.TryGetValue("/schuze", out argValue)) { schuze = Convert.ToInt32(argValue); } dsc = HlidacStatu.Api.V2.Dataset.Typed.Dataset <Steno> .OpenDataset(apikey, "stenozaznamy-psp"); //create dataset string datasetid = "stenozaznamy-psp"; //var data = ParsePSPWeb.ParseSchuze(2010, 5).ToArray(); //System.Diagnostics.Debugger.Break(); StreamWriter reader = null; CsvWriter csv = null; HashSet <string> jmena2Check = new HashSet <string>(); var vsechnSchuze = ParsePSPWeb.VsechnySchuze(rok); //find latest item already in DB var lastSchuzeInDb = 1; List <int> schuzeToParse = new List <int>(); if (schuze.HasValue) { schuzeToParse.Add(schuze.Value); } else if (rewrite) { schuzeToParse.AddRange(vsechnSchuze.Select(m => m.schuze)); } else { //za posledni 3 dny DateTime after = DateTime.Now.Date.AddDays(-1 * daysBack); schuzeToParse.AddRange(vsechnSchuze.Where(m => m.last >= after).Select(m => m.schuze)); } Console.WriteLine("Zpracuji schuze " + string.Join(",", schuzeToParse)); Devmasters.Batch.Manager.DoActionForAll <int>(schuzeToParse, s => { foreach (var item in ParsePSPWeb.ParseSchuze(rok, s)) { try { if (rewrite == false) { var exists = dsc.ItemExists(item.Id); if (exists) { continue; //exists, skip } } } catch (Exception) //doesnt exists { } if (item.celeJmeno?.Split(' ')?.Count() > 2) { if (!jmena2Check.Contains(item.celeJmeno)) { jmena2Check.Add(item.celeJmeno); } } using (var net = new Devmasters.Net.HttpClient.URLContent($"https://www.hlidacstatu.cz/api/v1/PoliticiFromText?Authorization={apikey}")) { net.Method = Devmasters.Net.HttpClient.MethodEnum.POST; net.RequestParams.Form.Add("text", item.text); net.Timeout = 60 * 1000; var sosoby = net.GetContent().Text; var osoby = Newtonsoft.Json.Linq.JArray.Parse(sosoby); if (osoby != null && osoby.Count > 0) { item.politiciZminky = osoby .Select(ja => ja.Value <string>("osobaid")) .Where(o => !string.IsNullOrWhiteSpace(o)) .ToArray(); } } if (apikey == "csv") { csv.WriteRecord <Steno>(item); csv.NextRecord(); if (item.poradi % 10 == 0) { csv.Flush(); } } else { SaveItem(item, true); } } return(new Devmasters.Batch.ActionOutputData()); }, !System.Diagnostics.Debugger.IsAttached); if (apikey == "csv") { csv.Flush(); csv.Dispose(); reader.Close(); } Console.WriteLine(); Console.WriteLine("Podezrela jmena:"); foreach (var k in jmena2Check) { Console.WriteLine(k); } return; //download, parse and save data into dataset //GetData(dsDef, datasetid, fn); }