private static Jednani.Blok[] Audio(Jednani j) { Jednani.Blok[] res = null; var mp3 = new MP3(mp3path, apiKey); var blocks = mp3.CheckDownloadAndStartV2TOrGet(DataSetId, j.Id, j.Odkaz); if (blocks != null) { var bs = blocks .Select(t => new Jednani.Blok() { SekundOdZacatku = (long)t.Start.TotalSeconds, Text = t.Text }) .ToArray(); res = blocks .Select(t => new Jednani.Blok() { SekundOdZacatku = (long)t.Start.TotalSeconds, Text = t.Text }) .ToArray(); } return(res); }
private static Jednani.Dokument[] GetZapisy(Jednani j) { if (hzapis == null) { using (Devmasters.Net.HttpClient.URLContent nzapis = new Devmasters.Net.HttpClient.URLContent("https://www.ceskatelevize.cz/rada-ct/zapisy-z-jednani/")) { hzapis = nzapis.GetContent().Text; } } var dzapis = new Devmasters.XPath(hzapis); var zapisy = dzapis.GetNodes("//a[@class='pdf']"); List <Jednani.Dokument> docs = new List <Jednani.Dokument>(); foreach (var z in zapisy) { if (z.InnerText.Contains($"({j.DatumJednani.ToString("d. M. yyyy")})")) { Uri?url = null; Uri.TryCreate(new Uri(urlPrefix), z.Attributes["href"].Value, out url); docs.Add(new Jednani.Dokument() { HsProcessType = "document", DocumentUrl = url.AbsoluteUri, Nazev = z.InnerText.Trim(), Typ = "zápis" }); } } return(docs.ToArray()); }
private static Jednani.Dokument[] GetMaterialy(Jednani j) { if (mzapis == null) { using (Devmasters.Net.HttpClient.URLContent nzapis = new Devmasters.Net.HttpClient.URLContent("https://www.ceskatelevize.cz/rada-ct/materialy-projednane-radou/")) { mzapis = nzapis.GetContent().Text; } } var dzapis = new Devmasters.XPath(mzapis); var casti = dzapis.GetNodes("//div[contains(@class,'contentArticle')]/h4[@class='odsazeni']"); List <Jednani.Dokument> docs = new List <Jednani.Dokument>(); foreach (var z in casti) { if (z.InnerText.Contains($"{j.DatumJednani.ToString("d. M. yyyy")}")) { var pars = Devmasters.XPath.Tools.GetNodes(z, "following::*"); //jdi az do dalsiho h4 foreach (var par in pars) { if (par.Name == "p") { var link = par.ChildNodes.Where(m => m.Name == "a").FirstOrDefault(); if (link != null) { Uri?url = null; Uri.TryCreate(new Uri(urlPrefix), link.Attributes["href"].Value, out url); docs.Add(new Jednani.Dokument() { HsProcessType = "document", DocumentUrl = url.AbsoluteUri, Typ = "material", Nazev = link.InnerText }); } } if (par.Name == "h4" && par.Attributes.FirstOrDefault()?.Value == "odsazeni") { goto end; //dalsi h4, pryc } } } } end: return(docs.ToArray()); }
static Jednani ParseJednani(Jednani j) { using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(j.Odkaz)) { var html = net.GetContent().Text; var doc = new Devmasters.XPath(html); j.Delka = Devmasters.TextUtil.ConvertToInt(Regex.Replace(doc.GetNodeText("//p[@class='duration']"), "\\D", ""), 0).Value; j.Materialy = GetMaterialy(j); j.Zapisy = GetZapisy(j); j.PrepisAudia = Audio(j); } return(j); }
static void Main(string[] arguments) { Console.WriteLine($"Jednání-Rady-ČT - {System.Reflection.Assembly.GetEntryAssembly().GetName().Version}"); Devmasters.Logging.Logger.Root.Info($"Jednání-Rady-ČT - {System.Reflection.Assembly.GetEntryAssembly().GetName().Version}"); Devmasters.Logging.Logger.Root.Debug("Jednání Rady ČT starting with " + string.Join(',', arguments)); var args = new Devmasters.Args(arguments, new string[] { "/mp3path", "/apikey" }); if (args.MandatoryPresent() == false) { Help(); } mp3path = args.Get("/mp3path", null); if (args.Exists("/utdl")) { YTDL = args["/utdl"]; } else { YTDL = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) + "\\youtube-dl.exe"; } startPath = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location); apiKey = args["/apikey"]; rewrite = args.Exists("/rewrite"); afterDay = DateTime.Now.Date.AddDays(-1 * args.GetNumber("/daysback", 10000).Value); if (args.Exists("/ids")) { ids = args.GetArray("/ids"); } skips2t = args.Exists("/skips2t"); int threads = args.GetNumber("/t") ?? 5; try { ds = HlidacStatu.Api.V2.Dataset.Typed.Dataset <Jednani> .OpenDataset(apiKey, DataSetId); } catch (ApiException e) { ds = HlidacStatu.Api.V2.Dataset.Typed.Dataset <Jednani> .CreateDataset(apiKey, Registration()); } catch (Exception e) { throw; } string nextPages = "https://www.ceskatelevize.cz/ivysilani/10000000064-jednani-rady-ceske-televize/dalsi-casti/{0}"; int page = 0; bool stop = false; List <Jednani> jednani = new List <Jednani>(); do { page++; using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(string.Format(nextPages, page))) { Console.WriteLine($"Page {page}"); net.IgnoreHttpErrors = true; net.Tries = 5; net.TimeInMsBetweenTries = 2000; string html = ""; try { Devmasters.Logging.Logger.Root.Debug($"downloading {net.Url} "); html = net.GetContent().Text; } catch (Exception e) { Devmasters.Logging.Logger.Root.Error($"{net.Url} failed", e); } Devmasters.XPath xp = new Devmasters.XPath(html); var links = xp.GetNodes("//li[contains(@class,'itemBlock')]"); if (links == null || links.Count == 0) { break; } foreach (var link in links) { Jednani j = new Jednani(); j.Odkaz = urlPrefix + Devmasters.XPath.Tools.GetNodeAttributeValue(link, "div/h3/a[@class='itemSetPaging']", "href"); j.Titulek = Devmasters.XPath.Tools.GetNodeText(link, "div/h3/a[@class='itemSetPaging']").Trim(); j.DatumJednani = Devmasters.DT.Util.ToDate(Devmasters.XPath.Tools.GetNodeText(link, "div/p").Trim()) ?? DateTime.MinValue; j.Id = Devmasters.RegexUtil.GetRegexGroupValue(j.Odkaz, "/ivysilani/10000000064-jednani-rady-ceske-televize/(?<id>\\d{2,})", "id"); if (j.DatumJednani > afterDay && (ids == null || ids.Contains(j.Id)) ) { jednani.Add(j); } } } } while (stop == false); // Devmasters.Logging.Logger.Root.Debug($"Starting {jednani.Count} items "); Devmasters.Batch.Manager.DoActionForAll <string>(jednani.Select(m => m.Id).Reverse(), id => { bool exists = ds.ItemExists(id); if (!string.IsNullOrEmpty(id) && (!exists || rewrite) ) { Devmasters.Logging.Logger.Root.Debug($"Start parsing {id} "); var fullJ = ParseJednani(jednani.First(m => m.Id == id)); Devmasters.Logging.Logger.Root.Debug($"Saving {id} "); ds.AddOrUpdateItem(fullJ, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); } else if (exists) { //check voice2text var fullJ = ds.GetItemSafe(id); if (!(fullJ.PrepisAudia?.Count() > 0)) { Devmasters.Logging.Logger.Root.Debug($"Checking AUDIO text {id} "); var aud = Audio(fullJ); if (aud?.Count() > 0) { fullJ.PrepisAudia = aud; ds.AddOrUpdateItem(fullJ, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); } } } return(new Devmasters.Batch.ActionOutputData() { Log = id }); }, true, maxDegreeOfParallelism: threads); }