Exemplo n.º 1
0
        private static Jednani.Blok[] Audio(Jednani j)
        {
            Jednani.Blok[] res    = null;
            var            mp3    = new MP3(mp3path, apiKey);
            var            blocks = mp3.CheckDownloadAndStartV2TOrGet(DataSetId, j.Id, j.Odkaz);

            if (blocks != null)
            {
                var bs = blocks
                         .Select(t => new Jednani.Blok()
                {
                    SekundOdZacatku = (long)t.Start.TotalSeconds, Text = t.Text
                })
                         .ToArray();


                res = blocks
                      .Select(t => new Jednani.Blok()
                {
                    SekundOdZacatku = (long)t.Start.TotalSeconds, Text = t.Text
                })
                      .ToArray();
            }


            return(res);
        }
Exemplo n.º 2
0
        private static Jednani.Dokument[] GetZapisy(Jednani j)
        {
            if (hzapis == null)
            {
                using (Devmasters.Net.HttpClient.URLContent nzapis = new Devmasters.Net.HttpClient.URLContent("https://www.ceskatelevize.cz/rada-ct/zapisy-z-jednani/"))
                {
                    hzapis = nzapis.GetContent().Text;
                }
            }
            var dzapis = new Devmasters.XPath(hzapis);

            var zapisy = dzapis.GetNodes("//a[@class='pdf']");

            List <Jednani.Dokument> docs = new List <Jednani.Dokument>();

            foreach (var z in zapisy)
            {
                if (z.InnerText.Contains($"({j.DatumJednani.ToString("d. M. yyyy")})"))
                {
                    Uri?url = null;
                    Uri.TryCreate(new Uri(urlPrefix), z.Attributes["href"].Value, out url);
                    docs.Add(new Jednani.Dokument()
                    {
                        HsProcessType = "document",
                        DocumentUrl   = url.AbsoluteUri,
                        Nazev         = z.InnerText.Trim(),
                        Typ           = "zápis"
                    });
                }
            }

            return(docs.ToArray());
        }
Exemplo n.º 3
0
        private static Jednani.Dokument[] GetMaterialy(Jednani j)
        {
            if (mzapis == null)
            {
                using (Devmasters.Net.HttpClient.URLContent nzapis = new Devmasters.Net.HttpClient.URLContent("https://www.ceskatelevize.cz/rada-ct/materialy-projednane-radou/"))
                {
                    mzapis = nzapis.GetContent().Text;
                }
            }
            var dzapis = new Devmasters.XPath(mzapis);

            var casti = dzapis.GetNodes("//div[contains(@class,'contentArticle')]/h4[@class='odsazeni']");

            List <Jednani.Dokument> docs = new List <Jednani.Dokument>();

            foreach (var z in casti)
            {
                if (z.InnerText.Contains($"{j.DatumJednani.ToString("d. M. yyyy")}"))
                {
                    var pars = Devmasters.XPath.Tools.GetNodes(z, "following::*");
                    //jdi az do dalsiho h4
                    foreach (var par in pars)
                    {
                        if (par.Name == "p")
                        {
                            var link = par.ChildNodes.Where(m => m.Name == "a").FirstOrDefault();
                            if (link != null)
                            {
                                Uri?url = null;
                                Uri.TryCreate(new Uri(urlPrefix), link.Attributes["href"].Value, out url);
                                docs.Add(new Jednani.Dokument()
                                {
                                    HsProcessType = "document",
                                    DocumentUrl   = url.AbsoluteUri,
                                    Typ           = "material",
                                    Nazev         = link.InnerText
                                });
                            }
                        }
                        if (par.Name == "h4" && par.Attributes.FirstOrDefault()?.Value == "odsazeni")
                        {
                            goto end; //dalsi h4, pryc
                        }
                    }
                }
            }
end:
            return(docs.ToArray());
        }
Exemplo n.º 4
0
        static Jednani ParseJednani(Jednani j)
        {
            using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(j.Odkaz))
            {
                var html = net.GetContent().Text;

                var doc = new Devmasters.XPath(html);

                j.Delka       = Devmasters.TextUtil.ConvertToInt(Regex.Replace(doc.GetNodeText("//p[@class='duration']"), "\\D", ""), 0).Value;
                j.Materialy   = GetMaterialy(j);
                j.Zapisy      = GetZapisy(j);
                j.PrepisAudia = Audio(j);
            }


            return(j);
        }
Exemplo n.º 5
0
        static void Main(string[] arguments)
        {
            Console.WriteLine($"Jednání-Rady-ČT - {System.Reflection.Assembly.GetEntryAssembly().GetName().Version}");
            Devmasters.Logging.Logger.Root.Info($"Jednání-Rady-ČT - {System.Reflection.Assembly.GetEntryAssembly().GetName().Version}");
            Devmasters.Logging.Logger.Root.Debug("Jednání Rady ČT starting with " + string.Join(',', arguments));


            var args = new Devmasters.Args(arguments, new string[] { "/mp3path", "/apikey" });

            if (args.MandatoryPresent() == false)
            {
                Help();
            }

            mp3path = args.Get("/mp3path", null);

            if (args.Exists("/utdl"))
            {
                YTDL = args["/utdl"];
            }
            else
            {
                YTDL = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) + "\\youtube-dl.exe";
            }

            startPath = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location);

            apiKey   = args["/apikey"];
            rewrite  = args.Exists("/rewrite");
            afterDay = DateTime.Now.Date.AddDays(-1 * args.GetNumber("/daysback", 10000).Value);
            if (args.Exists("/ids"))
            {
                ids = args.GetArray("/ids");
            }
            skips2t = args.Exists("/skips2t");



            int threads = args.GetNumber("/t") ?? 5;

            try
            {
                ds = HlidacStatu.Api.V2.Dataset.Typed.Dataset <Jednani> .OpenDataset(apiKey, DataSetId);
            }
            catch (ApiException e)
            {
                ds = HlidacStatu.Api.V2.Dataset.Typed.Dataset <Jednani> .CreateDataset(apiKey, Registration());
            }
            catch (Exception e)
            {
                throw;
            }



            string nextPages = "https://www.ceskatelevize.cz/ivysilani/10000000064-jednani-rady-ceske-televize/dalsi-casti/{0}";

            int            page    = 0;
            bool           stop    = false;
            List <Jednani> jednani = new List <Jednani>();

            do
            {
                page++;
                using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(string.Format(nextPages, page)))
                {
                    Console.WriteLine($"Page {page}");
                    net.IgnoreHttpErrors     = true;
                    net.Tries                = 5;
                    net.TimeInMsBetweenTries = 2000;
                    string html = "";
                    try
                    {
                        Devmasters.Logging.Logger.Root.Debug($"downloading {net.Url} ");
                        html = net.GetContent().Text;
                    }
                    catch (Exception e)
                    {
                        Devmasters.Logging.Logger.Root.Error($"{net.Url} failed", e);
                    }

                    Devmasters.XPath xp = new Devmasters.XPath(html);
                    var links           = xp.GetNodes("//li[contains(@class,'itemBlock')]");
                    if (links == null || links.Count == 0)
                    {
                        break;
                    }

                    foreach (var link in links)
                    {
                        Jednani j = new Jednani();
                        j.Odkaz        = urlPrefix + Devmasters.XPath.Tools.GetNodeAttributeValue(link, "div/h3/a[@class='itemSetPaging']", "href");
                        j.Titulek      = Devmasters.XPath.Tools.GetNodeText(link, "div/h3/a[@class='itemSetPaging']").Trim();
                        j.DatumJednani = Devmasters.DT.Util.ToDate(Devmasters.XPath.Tools.GetNodeText(link, "div/p").Trim()) ?? DateTime.MinValue;
                        j.Id           = Devmasters.RegexUtil.GetRegexGroupValue(j.Odkaz, "/ivysilani/10000000064-jednani-rady-ceske-televize/(?<id>\\d{2,})", "id");
                        if (j.DatumJednani > afterDay &&
                            (ids == null || ids.Contains(j.Id))
                            )
                        {
                            jednani.Add(j);
                        }
                    }
                }
            } while (stop == false);

            //
            Devmasters.Logging.Logger.Root.Debug($"Starting {jednani.Count} items ");

            Devmasters.Batch.Manager.DoActionForAll <string>(jednani.Select(m => m.Id).Reverse(),
                                                             id =>
            {
                bool exists = ds.ItemExists(id);
                if (!string.IsNullOrEmpty(id) &&
                    (!exists || rewrite)
                    )
                {
                    Devmasters.Logging.Logger.Root.Debug($"Start parsing {id} ");
                    var fullJ = ParseJednani(jednani.First(m => m.Id == id));

                    Devmasters.Logging.Logger.Root.Debug($"Saving {id} ");
                    ds.AddOrUpdateItem(fullJ, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite);
                }
                else if (exists)
                {
                    //check voice2text
                    var fullJ = ds.GetItemSafe(id);
                    if (!(fullJ.PrepisAudia?.Count() > 0))
                    {
                        Devmasters.Logging.Logger.Root.Debug($"Checking AUDIO text {id} ");
                        var aud = Audio(fullJ);
                        if (aud?.Count() > 0)
                        {
                            fullJ.PrepisAudia = aud;
                            ds.AddOrUpdateItem(fullJ, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite);
                        }
                    }
                }
                return(new Devmasters.Batch.ActionOutputData()
                {
                    Log = id
                });
            }, true, maxDegreeOfParallelism: threads);
        }